ROS OpenCL
ros_opencl.cpp
Go to the documentation of this file.
2 
3 namespace ros_opencl {
4 
5  // Private Methods
6 
8  clReleaseKernel (kernel);
9  clReleaseProgram (program);
10  clReleaseContext (context);
11  }
12 
13  std::string ROS_OpenCL::getPlatformName (const cl_platform_id id){
14  size_t size = 0;
15  clGetPlatformInfo (id, CL_PLATFORM_NAME, 0, NULL, &size);
16 
17  std::string result;
18  result.resize (size);
19  clGetPlatformInfo (id, CL_PLATFORM_NAME, size, const_cast<char*> (result.data ()), NULL);
20 
21  return result;
22  }
23 
24  std::string ROS_OpenCL::getDeviceName (const cl_device_id id){
25  size_t size = 0;
26  clGetDeviceInfo (id, CL_DEVICE_NAME, 0, NULL, &size);
27 
28  std::string result;
29  result.resize (size);
30  clGetDeviceInfo (id, CL_DEVICE_NAME, size, const_cast<char*> (result.data ()), NULL);
31 
32  return result;
33  }
34 
35  void ROS_OpenCL::checkError (const cl_int error){
36  if (error != CL_SUCCESS) {
37  std::string error_message = "";
38 
39  switch(error){
40  case CL_DEVICE_NOT_FOUND:
41  error_message = "CL_DEVICE_NOT_FOUND";
42  break;
43  case CL_DEVICE_NOT_AVAILABLE:
44  error_message = "CL_DEVICE_NOT_AVAILABLE";
45  break;
46  case CL_COMPILER_NOT_AVAILABLE:
47  error_message = "CL_COMPILER_NOT_AVAILABLE";
48  break;
49  case CL_MEM_OBJECT_ALLOCATION_FAILURE:
50  error_message = "CL_MEM_OBJECT_ALLOCATION_FAILURE";
51  break;
52  case CL_OUT_OF_RESOURCES:
53  error_message = "CL_OUT_OF_RESOURCES";
54  break;
55  case CL_OUT_OF_HOST_MEMORY:
56  error_message = "CL_OUT_OF_HOST_MEMORY";
57  break;
58  case CL_PROFILING_INFO_NOT_AVAILABLE:
59  error_message = "CL_PROFILING_INFO_NOT_AVAILABLE";
60  break;
61  case CL_MEM_COPY_OVERLAP:
62  error_message = "CL_MEM_COPY_OVERLAP";
63  break;
64  case CL_IMAGE_FORMAT_MISMATCH:
65  error_message = "CL_IMAGE_FORMAT_MISMATCH";
66  break;
67  case CL_IMAGE_FORMAT_NOT_SUPPORTED:
68  error_message = "CL_IMAGE_FORMAT_NOT_SUPPORTED";
69  break;
70  case CL_BUILD_PROGRAM_FAILURE:
71  error_message = "CL_BUILD_PROGRAM_FAILURE";
72  break;
73  case CL_MAP_FAILURE:
74  error_message = "CL_MAP_FAILURE";
75  break;
76  case CL_MISALIGNED_SUB_BUFFER_OFFSET:
77  error_message = "CL_MISALIGNED_SUB_BUFFER_OFFSET";
78  break;
79  case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
80  error_message = "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
81  break;
82  case CL_COMPILE_PROGRAM_FAILURE:
83  error_message = "CL_COMPILE_PROGRAM_FAILURE";
84  break;
85  case CL_LINKER_NOT_AVAILABLE:
86  error_message = "CL_LINKER_NOT_AVAILABLE";
87  break;
88  case CL_LINK_PROGRAM_FAILURE:
89  error_message = "CL_LINK_PROGRAM_FAILURE";
90  break;
91  case CL_DEVICE_PARTITION_FAILED:
92  error_message = "CL_DEVICE_PARTITION_FAILED";
93  break;
94  case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
95  error_message = "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
96  break;
97  case CL_INVALID_VALUE:
98  error_message = "CL_INVALID_VALUE";
99  break;
100  case CL_INVALID_DEVICE_TYPE:
101  error_message = "CL_INVALID_DEVICE_TYPE";
102  break;
103  case CL_INVALID_PLATFORM:
104  error_message = "CL_INVALID_PLATFORM";
105  break;
106  case CL_INVALID_DEVICE:
107  error_message = "CL_INVALID_DEVICE";
108  break;
109  case CL_INVALID_CONTEXT:
110  error_message = "CL_INVALID_CONTEXT";
111  break;
112  case CL_INVALID_QUEUE_PROPERTIES:
113  error_message = "CL_INVALID_QUEUE_PROPERTIES";
114  break;
115  case CL_INVALID_COMMAND_QUEUE:
116  error_message = "CL_INVALID_COMMAND_QUEUE";
117  break;
118  case CL_INVALID_HOST_PTR:
119  error_message = "CL_INVALID_HOST_PTR";
120  break;
121  case CL_INVALID_MEM_OBJECT:
122  error_message = "CL_INVALID_MEM_OBJECT";
123  break;
124  case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
125  error_message = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
126  break;
127  case CL_INVALID_BUILD_OPTIONS:
128  error_message = "CL_INVALID_BUILD_OPTIONS";
129  break;
130  case CL_INVALID_PROGRAM:
131  error_message = "CL_INVALID_PROGRAM";
132  break;
133  case CL_INVALID_BINARY:
134  error_message = "CL_INVALID_BINARY";
135  break;
136  case CL_INVALID_PROGRAM_EXECUTABLE:
137  error_message = "CL_INVALID_PROGRAM_EXECUTABLE";
138  break;
139  case CL_INVALID_KERNEL_NAME:
140  error_message = "CL_INVALID_KERNEL_NAME";
141  break;
142  case CL_INVALID_KERNEL_DEFINITION:
143  error_message = "CL_INVALID_KERNEL_DEFINITION";
144  break;
145  case CL_INVALID_KERNEL:
146  error_message = "CL_INVALID_KERNEL";
147  break;
148  case CL_INVALID_ARG_INDEX:
149  error_message = "CL_INVALID_ARG_INDEX";
150  break;
151  case CL_INVALID_ARG_VALUE:
152  error_message = "CL_INVALID_ARG_VALUE";
153  break;
154  case CL_INVALID_ARG_SIZE:
155  error_message = "CL_INVALID_ARG_SIZE";
156  break;
157  case CL_INVALID_KERNEL_ARGS:
158  error_message = "CL_INVALID_KERNEL_ARGS";
159  break;
160  case CL_INVALID_WORK_DIMENSION:
161  error_message = "CL_INVALID_WORK_DIMENSION";
162  break;
163  case CL_INVALID_WORK_GROUP_SIZE:
164  error_message = "CL_INVALID_WORK_GROUP_SIZE";
165  break;
166  case CL_INVALID_WORK_ITEM_SIZE:
167  error_message = "CL_INVALID_WORK_ITEM_SIZE";
168  break;
169  case CL_INVALID_GLOBAL_OFFSET:
170  error_message = "CL_INVALID_GLOBAL_OFFSET";
171  break;
172  case CL_INVALID_EVENT_WAIT_LIST:
173  error_message = "CL_INVALID_EVENT_WAIT_LIST";
174  break;
175  case CL_INVALID_EVENT:
176  error_message = "CL_INVALID_EVENT";
177  break;
178  case CL_INVALID_OPERATION:
179  error_message = "CL_INVALID_OPERATION";
180  break;
181  case CL_INVALID_GL_OBJECT:
182  error_message = "CL_INVALID_GL_OBJECT";
183  break;
184  case CL_INVALID_BUFFER_SIZE:
185  error_message = "CL_INVALID_BUFFER_SIZE";
186  break;
187  case CL_INVALID_MIP_LEVEL:
188  error_message = "CL_INVALID_MIP_LEVEL";
189  break;
190  case CL_INVALID_GLOBAL_WORK_SIZE:
191  error_message = "CL_INVALID_GLOBAL_WORK_SIZE";
192  break;
193  case CL_INVALID_PROPERTY:
194  error_message = "CL_INVALID_PROPERTY";
195  break;
196  case CL_INVALID_IMAGE_DESCRIPTOR:
197  error_message = "CL_INVALID_IMAGE_DESCRIPTOR";
198  break;
199  case CL_INVALID_COMPILER_OPTIONS:
200  error_message = "CL_INVALID_COMPILER_OPTIONS";
201  break;
202  case CL_INVALID_LINKER_OPTIONS:
203  error_message = "CL_INVALID_LINKER_OPTIONS";
204  break;
205  case CL_INVALID_DEVICE_PARTITION_COUNT:
206  error_message = "CL_INVALID_DEVICE_PARTITION_COUNT";
207  break;
208  }
209 
210  ROS_ERROR("OpenCL call failed with error: %d: %s", error, error_message.c_str());
211  exit (1);
212  }
213  }
214 
215  std::string ROS_OpenCL::LoadKernel (const char* name){
216  std::ifstream in (name);
217  std::string result((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>());
218  return result;
219  }
220 
221  cl_program ROS_OpenCL::createProgram (const std::string& source, const cl_context context){
222  size_t lengths [1] = { source.size () };
223  const char* sources [1] = { source.data () };
224 
225  cl_int error = 0;
226  cl_program program_ = clCreateProgramWithSource(context, 1, sources, lengths, &error);
227  checkError (error);
228 
229  return program_;
230  }
231 
232  // Public Methods
233 
234  ROS_OpenCL::ROS_OpenCL(const std::string full_kernel_path, const std::string kernel_function){
235  cl_uint platformIdCount = 0;
236  clGetPlatformIDs (0, NULL, &platformIdCount);
237 
238  if (platformIdCount == 0) {
239  ROS_ERROR("No OpenCL platform found");
240  exit (1);
241  }
242  else {
243  ROS_INFO("Found %d platform(s)", platformIdCount);
244  }
245 
246  std::vector<cl_platform_id> platformIds (platformIdCount);
247  clGetPlatformIDs (platformIdCount, platformIds.data (), NULL);
248 
249  for (cl_uint i = 0; i < platformIdCount; ++i) {
250  ROS_INFO("\t (%d) : %s", i+1, getPlatformName (platformIds [i]).c_str());
251  }
252 
253  cl_uint deviceIdCount = 0;
254  clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceIdCount);
255 
256  if (deviceIdCount == 0) {
257  ROS_ERROR("No OpenCL devices found");
258  exit (1);
259  }
260  else {
261  ROS_INFO("Found %d device(s)", deviceIdCount);
262  }
263 
264  deviceIds = std::vector<cl_device_id>(deviceIdCount);
265  clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data(), NULL);
266 
267  for (cl_uint i = 0; i < deviceIdCount; ++i) {
268  ROS_INFO("\t (%d) : %s", i+1, getDeviceName (deviceIds [i]).c_str());
269  }
270 
271  const cl_context_properties contextProperties [] = {CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties> (platformIds [0]), 0, 0};
272 
273  cl_int error = CL_SUCCESS;
274  context = clCreateContext (contextProperties, deviceIdCount, deviceIds.data (), NULL, NULL, &error);
275  checkError (error);
276 
277  ROS_INFO("Context created");
278 
279  program = createProgram (LoadKernel (full_kernel_path.c_str()), context);
280 
281  checkError (clBuildProgram (program, deviceIdCount, deviceIds.data (), "-D FILTER_SIZE=1", NULL, NULL));
282 
283  ROS_INFO("Program built");
284 
285  kernel = clCreateKernel (program, kernel_function.c_str(), &error);
286  checkError (error);
287 
288  ROS_INFO("Kernel created");
289  }
290 
292  if(!deviceIds.empty()){
293  clean();
294  }
295  }
296 
298  std::swap(kernel, s->kernel);
299  std::swap(context, s->context);
300  std::swap(program, s->program);
301  std::swap(deviceIds, s->deviceIds);
302  return *this;
303  }
304 
305  sensor_msgs::PointCloud2 ROS_OpenCL::process(const sensor_msgs::PointCloud2& msg){
306  size_t sz = msg.data.size();
307  cl_int error = 0;
308  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, &error);
309  checkError(error);
310  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
311  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
312 
313  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg.data[0], 0, NULL, NULL);
314  checkError (error);
315 
316  size_t size = sz;
317 
318  cl_event gpuExec;
319 
320  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
321 
322  clWaitForEvents(1, &gpuExec);
323 
324  uint8_t *result = (uint8_t *) malloc(sz);
325  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
326 
327  sensor_msgs::PointCloud2 res = sensor_msgs::PointCloud2(msg);
328  res.data.assign(result, result+sz);
329 
330  clReleaseCommandQueue (queue);
331  clReleaseMemObject(buffer);
332  clReleaseEvent(gpuExec);
333  free(result);
334 
335  return res;
336  }
337 
338  void ROS_OpenCL::process(sensor_msgs::PointCloud2::Ptr msg){
339  size_t sz = msg->data.size();
340  cl_int error = 0;
341  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, &error);
342  checkError(error);
343  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
344  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
345 
346  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg->data[0], 0, NULL, NULL);
347  checkError (error);
348 
349  size_t size = sz;
350 
351  cl_event gpuExec;
352 
353  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
354 
355  clWaitForEvents(1, &gpuExec);
356 
357  uint8_t *result = (uint8_t *) malloc(sz);
358  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
359 
360  msg->data.assign(result, result+sz);
361 
362  clReleaseCommandQueue (queue);
363  clReleaseMemObject(buffer);
364  clReleaseEvent(gpuExec);
365  free(result);
366  }
367 
368  sensor_msgs::LaserScan ROS_OpenCL::process(const sensor_msgs::LaserScan& msg){
369  size_t sz = msg.ranges.size();
370  cl_int typesz = sizeof(float) * sz;
371  cl_int error = 0;
372  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
373  checkError(error);
374  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
375  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
376 
377  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &msg.ranges[0], 0, NULL, NULL);
378  checkError (error);
379 
380  size_t size = sz;
381 
382  cl_event gpuExec;
383 
384  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
385 
386  clWaitForEvents(1, &gpuExec);
387 
388  float *result = (float *) malloc(typesz);
389  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
390 
391  sensor_msgs::LaserScan res = sensor_msgs::LaserScan(msg);
392  res.ranges.assign(result, result+sz);
393 
394  clReleaseCommandQueue (queue);
395  clReleaseMemObject(buffer);
396  clReleaseEvent(gpuExec);
397  free(result);
398 
399  return res;
400  }
401 
402  void ROS_OpenCL::process(sensor_msgs::LaserScan::Ptr msg){
403  size_t sz = msg->ranges.size();
404  cl_int typesz = sizeof(float) * sz;
405  cl_int error = 0;
406  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
407  checkError(error);
408  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
409  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
410 
411  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &msg->ranges[0], 0, NULL, NULL);
412  checkError (error);
413 
414  size_t size = sz;
415 
416  cl_event gpuExec;
417 
418  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
419 
420  clWaitForEvents(1, &gpuExec);
421 
422  float *result = (float *) malloc(typesz);
423  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
424 
425  msg->ranges.assign(result, result+sz);
426 
427  clReleaseCommandQueue (queue);
428  clReleaseMemObject(buffer);
429  clReleaseEvent(gpuExec);
430  free(result);
431  }
432 
433  sensor_msgs::Image ROS_OpenCL::process(const sensor_msgs::Image& msg){
434  size_t sz = msg.data.size();
435  cl_int error = 0;
436  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, &error);
437  checkError(error);
438  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
439  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
440 
441  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg.data[0], 0, NULL, NULL);
442  checkError (error);
443 
444  size_t size = sz;
445 
446  cl_event gpuExec;
447 
448  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
449 
450  clWaitForEvents(1, &gpuExec);
451 
452  uint8_t *result = (uint8_t *) malloc(sz);
453  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
454 
455  sensor_msgs::Image res = sensor_msgs::Image(msg);
456  res.data.assign(result, result+sz);
457 
458  clReleaseCommandQueue (queue);
459  clReleaseMemObject(buffer);
460  clReleaseEvent(gpuExec);
461  free(result);
462 
463  return res;
464  }
465 
466  void ROS_OpenCL::process(sensor_msgs::Image::Ptr msg){
467  size_t sz = msg->data.size();
468  cl_int error = 0;
469  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, &error);
470  checkError(error);
471  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
472  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
473 
474  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg->data[0], 0, NULL, NULL);
475  checkError (error);
476 
477  size_t size = sz;
478 
479  cl_event gpuExec;
480 
481  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
482 
483  clWaitForEvents(1, &gpuExec);
484 
485  uint8_t *result = (uint8_t *) malloc(sz);
486  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
487 
488  msg->data.assign(result, result+sz);
489 
490  clReleaseCommandQueue (queue);
491  clReleaseMemObject(buffer);
492  clReleaseEvent(gpuExec);
493  free(result);
494  }
495 
496  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const ROS_OpenCL_Params* params){
497  size_t sz = v.size();
498  cl_int typesz = sizeof(float) * sz;
499  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
500  if (temp_sz > 0){
501  if (temp_sz != 1){
502  ROS_WARN("buffers_size includes more elements than needed! Using only the first...");
503  }
504  typesz = sizeof(float) * params->buffers_size[0];
505  }
506  cl_int error = 0;
507  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
508  checkError(error);
509  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
510  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
511 
512  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
513  checkError (error);
514 
515  size_t size = sz;
516  temp_sz = params != NULL ? params->global_work_size.size() : 0;
517  if (temp_sz > 0){
518  if (temp_sz != 1){
519  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
520  }
521  size = params->global_work_size[0];
522  }
523 
524  cl_event gpuExec;
525 
526  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
527 
528  clWaitForEvents(1, &gpuExec);
529 
530  float *result = (float *) malloc(typesz);
531  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
532 
533  std::vector<float> res = std::vector<float>();
534 
535  if (params != NULL and params->buffers_size.size() > 0){
536  res.assign(result, result+params->buffers_size[0]);
537  }
538  else{
539  res.assign(result, result+sz);
540  }
541 
542  clReleaseCommandQueue (queue);
543  clReleaseMemObject(buffer);
544  clReleaseEvent(gpuExec);
545  free(result);
546 
547  return res;
548  }
549 
550  void ROS_OpenCL::process(std::vector<float>* v, const ROS_OpenCL_Params* params){
551  size_t sz = v->size();
552  cl_int typesz = sizeof(float) * sz;
553  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
554  if (temp_sz > 0){
555  if (temp_sz != 1){
556  ROS_WARN("buffer_size includes more elements than needed! Using only the first... Using only the first...");
557  }
558  typesz = sizeof(float) * params->buffers_size[0];
559  }
560  cl_int error = 0;
561  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
562  checkError(error);
563  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
564  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
565 
566  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
567  checkError (error);
568 
569  size_t size = sz;
570  temp_sz = params != NULL ? params->global_work_size.size() : 0;
571  if (temp_sz > 0){
572  if (temp_sz != 1){
573  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
574  }
575  size = params->global_work_size[0];
576  }
577 
578  cl_event gpuExec;
579 
580  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
581 
582  clWaitForEvents(1, &gpuExec);
583 
584  float *result = (float *) malloc(typesz);
585  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
586 
587  if (params != NULL and params->buffers_size.size() > 0){
588  v->assign(result, result+params->buffers_size[0]);
589  }
590  else{
591  v->assign(result, result+sz);
592  }
593 
594  clReleaseCommandQueue (queue);
595  clReleaseMemObject(buffer);
596  clReleaseEvent(gpuExec);
597  free(result);
598  }
599 
600  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const ROS_OpenCL_Params* params){
601  size_t sz = v.size();
602  cl_int typesz = sizeof(double) * sz;
603  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
604  if (temp_sz > 0){
605  if (temp_sz != 1){
606  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
607  }
608  typesz = sizeof(float) * params->buffers_size[0];
609  }
610  cl_int error = 0;
611  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
612  checkError(error);
613  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
614  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
615 
616  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
617  checkError (error);
618 
619  size_t size = sz;
620  temp_sz = params != NULL ? params->global_work_size.size() : 0;
621  if (temp_sz > 0){
622  if (temp_sz != 1){
623  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
624  }
625  size = params->global_work_size[0];
626  }
627 
628  cl_event gpuExec;
629 
630  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
631 
632  clWaitForEvents(1, &gpuExec);
633 
634  double *result = (double *) malloc(typesz);
635  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
636 
637  std::vector<double> res = std::vector<double>();
638 
639  if (params != NULL and params->buffers_size.size() > 0){
640  res.assign(result, result+params->buffers_size[0]);
641  }
642  else{
643  res.assign(result, result+sz);
644  }
645 
646  clReleaseCommandQueue (queue);
647  clReleaseMemObject(buffer);
648  clReleaseEvent(gpuExec);
649  free(result);
650 
651  return res;
652  }
653 
654  void ROS_OpenCL::process(std::vector<double>* v, const ROS_OpenCL_Params* params){
655  size_t sz = v->size();
656  cl_int typesz = sizeof(double) * sz;
657  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
658  if (temp_sz > 0){
659  if (temp_sz != 1){
660  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
661  }
662  typesz = sizeof(float) * params->buffers_size[0];
663  }
664  cl_int error = 0;
665  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
666  checkError(error);
667  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
668  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
669 
670  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
671  checkError (error);
672 
673  size_t size = sz;
674  temp_sz = params != NULL ? params->global_work_size.size() : 0;
675  if (temp_sz > 0){
676  if (temp_sz != 1){
677  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
678  }
679  size = params->global_work_size[0];
680  }
681 
682  cl_event gpuExec;
683 
684  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
685 
686  clWaitForEvents(1, &gpuExec);
687 
688  double *result = (double *) malloc(typesz);
689  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
690 
691  if (params != NULL and params->buffers_size.size() > 0){
692  v->assign(result, result+params->buffers_size[0]);
693  }
694  else{
695  v->assign(result, result+sz);
696  }
697 
698  clReleaseCommandQueue (queue);
699  clReleaseMemObject(buffer);
700  clReleaseEvent(gpuExec);
701  free(result);
702  }
703 
704  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const ROS_OpenCL_Params* params){
705  size_t sz = v.size();
706  cl_int typesz = sizeof(int) * sz;
707  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
708  if (temp_sz > 0){
709  if (temp_sz != 1){
710  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
711  }
712  typesz = sizeof(float) * params->buffers_size[0];
713  }
714  cl_int error = 0;
715  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
716  checkError(error);
717  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
718  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
719 
720  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
721  checkError (error);
722 
723  size_t size = sz;
724  temp_sz = params != NULL ? params->global_work_size.size() : 0;
725  if (temp_sz > 0){
726  if (temp_sz != 1){
727  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
728  }
729  size = params->global_work_size[0];
730  }
731 
732  cl_event gpuExec;
733 
734  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
735 
736  clWaitForEvents(1, &gpuExec);
737 
738  int *result = (int *) malloc(typesz);
739  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
740 
741  std::vector<int> res = std::vector<int>();
742 
743  if (params != NULL and params->buffers_size.size() > 0){
744  res.assign(result, result+params->buffers_size[0]);
745  }
746  else{
747  res.assign(result, result+sz);
748  }
749 
750  clReleaseCommandQueue (queue);
751  clReleaseMemObject(buffer);
752  clReleaseEvent(gpuExec);
753  free(result);
754 
755  return res;
756  }
757 
758  void ROS_OpenCL::process(std::vector<int>* v, const ROS_OpenCL_Params* params){
759  size_t sz = v->size();
760  cl_int typesz = sizeof(int) * sz;
761  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
762  if (temp_sz > 0){
763  if (temp_sz != 1){
764  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
765  }
766  typesz = sizeof(float) * params->buffers_size[0];
767  }
768  cl_int error = 0;
769  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
770  checkError(error);
771  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
772  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
773 
774  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
775  checkError (error);
776 
777  size_t size = sz;
778  temp_sz = params != NULL ? params->global_work_size.size() : 0;
779  if (temp_sz > 0){
780  if (temp_sz != 1){
781  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
782  }
783  size = params->global_work_size[0];
784  }
785 
786  cl_event gpuExec;
787 
788  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
789 
790  clWaitForEvents(1, &gpuExec);
791 
792  int *result = (int *) malloc(typesz);
793  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
794 
795  if (params != NULL and params->buffers_size.size() > 0){
796  v->assign(result, result+params->buffers_size[0]);
797  }
798  else{
799  v->assign(result, result+sz);
800  }
801 
802  clReleaseCommandQueue (queue);
803  clReleaseMemObject(buffer);
804  clReleaseEvent(gpuExec);
805  free(result);
806  }
807 
808  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const ROS_OpenCL_Params* params){
809  size_t sz = v.size();
810  cl_int typesz = sizeof(char) * sz;
811  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
812  if (temp_sz > 0){
813  if (temp_sz != 1){
814  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
815  }
816  typesz = sizeof(float) * params->buffers_size[0];
817  }
818  cl_int error = 0;
819  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
820  checkError(error);
821  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
822  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
823 
824  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
825  checkError (error);
826 
827  size_t size = sz;
828  temp_sz = params != NULL ? params->global_work_size.size() : 0;
829  if (temp_sz > 0){
830  if (temp_sz != 1){
831  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
832  }
833  size = params->global_work_size[0];
834  }
835 
836  cl_event gpuExec;
837 
838  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
839 
840  clWaitForEvents(1, &gpuExec);
841 
842  char *result = (char *) malloc(typesz);
843  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
844 
845  std::vector<char> res = std::vector<char>();
846 
847  if (params != NULL and params->buffers_size.size() > 0){
848  res.assign(result, result+params->buffers_size[0]);
849  }
850  else{
851  res.assign(result, result+sz);
852  }
853 
854  clReleaseCommandQueue (queue);
855  clReleaseMemObject(buffer);
856  clReleaseEvent(gpuExec);
857  free(result);
858 
859  return res;
860  }
861 
862  void ROS_OpenCL::process(std::vector<char>* v, const ROS_OpenCL_Params* params){
863  size_t sz = v->size();
864  cl_int typesz = sizeof(char) * sz;
865  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
866  if (temp_sz > 0){
867  if (temp_sz != 1){
868  ROS_WARN("buffer_size includes more elements than needed! Using only the first...");
869  }
870  typesz = sizeof(float) * params->buffers_size[0];
871  }
872  cl_int error = 0;
873  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
874  checkError(error);
875  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
876  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
877 
878  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
879  checkError (error);
880 
881  size_t size = sz;
882  temp_sz = params != NULL ? params->global_work_size.size() : 0;
883  if (temp_sz > 0){
884  if (temp_sz != 1){
885  ROS_WARN("global_work_size includes more elements than needed! Using only the first...");
886  }
887  size = params->global_work_size[0];
888  }
889 
890  cl_event gpuExec;
891 
892  checkError (clEnqueueNDRangeKernel (queue, kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
893 
894  clWaitForEvents(1, &gpuExec);
895 
896  char *result = (char *) malloc(typesz);
897  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
898 
899  if (params != NULL and params->buffers_size.size() > 0){
900  v->assign(result, result+params->buffers_size[0]);
901  }
902  else{
903  v->assign(result, result+sz);
904  }
905 
906  clReleaseCommandQueue (queue);
907  clReleaseMemObject(buffer);
908  clReleaseEvent(gpuExec);
909  free(result);
910  }
911 
912  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
913  size_t sz = v.size();
914  size_t sz2 = v2.size();
915  size_t typesz = sizeof(char) * sz;
916  size_t typesz2 = sizeof(char) * sz2;
917  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
918  if (temp_sz > 0){
919  if (temp_sz > 1){
920  if (temp_sz > 2){
921  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
922  }
923  typesz = sizeof(char) * params->buffers_size[0];
924  typesz2 = sizeof(char) * params->buffers_size[1];
925  }
926  else{
927  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
928  }
929  }
930  cl_int error = 0;
931  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
932  checkError(error);
933  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
934  checkError(error);
935  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
936  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
937  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
938 
939  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
940  checkError (error);
941  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
942  checkError (error);
943 
944  size_t size[2] = {sz, sz2};
945  size_t work_dimension = 2;
946 
947  temp_sz = params != NULL ? params->global_work_size.size() : 0;
948  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
949  work_dimension--;
950  }
951  else if(temp_sz > 0){
952  if (params->multi_dimensional){
953  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
954  For default multidimensional global work size, leave the global_work_size vector empty, \
955  and set multi_dimensional to true. Setting the global work size based on the values inside \
956  the global_work_size vector.");
957  }
958  if (temp_sz == 1){
959  size[0] = params->global_work_size[0];
960  work_dimension--;
961  }
962  else{
963  size[0] = params->global_work_size[0];
964  size[1] = params->global_work_size[1];
965  if (temp_sz > 2){
966  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
967  }
968  }
969  }
970 
971  cl_event gpuExec;
972 
973  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
974 
975  clWaitForEvents(1, &gpuExec);
976 
977  char *result = (char *) malloc(typesz);
978  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
979 
980  std::vector<char> res = std::vector<char>();
981  res.assign(result, result+sz);
982 
983  clReleaseCommandQueue (queue);
984  clReleaseMemObject(buffer);
985  clReleaseMemObject(buffer2);
986  clReleaseEvent(gpuExec);
987  free(result);
988 
989  return res;
990  }
991 
992  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
993  size_t sz = v->size();
994  size_t sz2 = v2.size();
995  size_t typesz = sizeof(char) * sz;
996  size_t typesz2 = sizeof(char) * sz2;
997  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
998  if (temp_sz > 0){
999  if (temp_sz > 1){
1000  if (temp_sz > 2){
1001  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1002  }
1003  typesz = sizeof(char) * params->buffers_size[0];
1004  typesz2 = sizeof(char) * params->buffers_size[1];
1005  }
1006  else{
1007  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1008  }
1009  }
1010  cl_int error = 0;
1011  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1012  checkError(error);
1013  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1014  checkError(error);
1015  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1016  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1017  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1018 
1019  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1020  checkError (error);
1021  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1022  checkError (error);
1023 
1024  size_t size[2] = {sz, sz2};
1025  size_t work_dimension = 2;
1026 
1027  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1028  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1029  work_dimension--;
1030  }
1031  else if(temp_sz > 0){
1032  if (params->multi_dimensional){
1033  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1034  For default multidimensional global work size, leave the global_work_size vector empty, \
1035  and set multi_dimensional to true. Setting the global work size based on the values inside \
1036  the global_work_size vector.");
1037  }
1038  if (temp_sz == 1){
1039  size[0] = params->global_work_size[0];
1040  work_dimension--;
1041  }
1042  else{
1043  size[0] = params->global_work_size[0];
1044  size[1] = params->global_work_size[1];
1045  if (temp_sz > 2){
1046  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1047  }
1048  }
1049  }
1050 
1051  cl_event gpuExec;
1052 
1053  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1054 
1055  clWaitForEvents(1, &gpuExec);
1056 
1057  char *result = (char *) malloc(typesz);
1058  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1059 
1060  v->assign(result, result+sz);
1061 
1062  clReleaseCommandQueue (queue);
1063  clReleaseMemObject(buffer);
1064  clReleaseMemObject(buffer2);
1065  clReleaseEvent(gpuExec);
1066  free(result);
1067  }
1068 
1069  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, const ROS_OpenCL_Params* params){
1070  size_t sz = v->size();
1071  size_t sz2 = v2->size();
1072  size_t typesz = sizeof(char) * sz;
1073  size_t typesz2 = sizeof(char) * sz2;
1074  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1075  if (temp_sz > 0){
1076  if (temp_sz > 1){
1077  if (temp_sz > 2){
1078  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1079  }
1080  typesz = sizeof(char) * params->buffers_size[0];
1081  typesz2 = sizeof(char) * params->buffers_size[1];
1082  }
1083  else{
1084  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1085  }
1086  }
1087  cl_int error = 0;
1088  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1089  checkError(error);
1090  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1091  checkError(error);
1092  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1093  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1094  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1095 
1096  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1097  checkError (error);
1098  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1099  checkError (error);
1100 
1101  size_t size[2] = {sz, sz2};
1102  size_t work_dimension = 2;
1103 
1104  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1105  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1106  work_dimension--;
1107  }
1108  else if(temp_sz > 0){
1109  if (params->multi_dimensional){
1110  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1111  For default multidimensional global work size, leave the global_work_size vector empty, \
1112  and set multi_dimensional to true. Setting the global work size based on the values inside \
1113  the global_work_size vector.");
1114  }
1115  if (temp_sz == 1){
1116  size[0] = params->global_work_size[0];
1117  work_dimension--;
1118  }
1119  else{
1120  size[0] = params->global_work_size[0];
1121  size[1] = params->global_work_size[1];
1122  if (temp_sz > 2){
1123  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1124  }
1125  }
1126  }
1127 
1128  cl_event gpuExec;
1129 
1130  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1131 
1132  clWaitForEvents(1, &gpuExec);
1133 
1134  char *result = (char *) malloc(typesz);
1135  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1136 
1137  v->assign(result, result+sz);
1138 
1139  if (typesz2 != typesz or sz != sz2){
1140  char *result2;
1141  result2 = (char *) malloc(typesz2);
1142  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1143 
1144  v2->assign(result2, result2+sz2);
1145  free(result2);
1146  }
1147  else{
1148  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
1149 
1150  v2->assign(result, result+sz2);
1151  }
1152 
1153  clReleaseCommandQueue (queue);
1154  clReleaseMemObject(buffer);
1155  clReleaseMemObject(buffer2);
1156  clReleaseEvent(gpuExec);
1157  free(result);
1158  }
1159 
1160  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
1161  size_t sz = v.size();
1162  size_t sz2 = v2.size();
1163  size_t typesz = sizeof(char) * sz;
1164  size_t typesz2 = sizeof(int) * sz2;
1165  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1166  if (temp_sz > 0){
1167  if (temp_sz > 1){
1168  if (temp_sz > 2){
1169  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1170  }
1171  typesz = sizeof(char) * params->buffers_size[0];
1172  typesz2 = sizeof(int) * params->buffers_size[1];
1173  }
1174  else{
1175  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1176  }
1177  }
1178  cl_int error = 0;
1179  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1180  checkError(error);
1181  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1182  checkError(error);
1183  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1184  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1185  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1186 
1187  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1188  checkError (error);
1189  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1190  checkError (error);
1191 
1192  size_t size[2] = {sz, sz2};
1193  size_t work_dimension = 2;
1194 
1195  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1196  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1197  work_dimension--;
1198  }
1199  else if(temp_sz > 0){
1200  if (params->multi_dimensional){
1201  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1202  For default multidimensional global work size, leave the global_work_size vector empty, \
1203  and set multi_dimensional to true. Setting the global work size based on the values inside \
1204  the global_work_size vector.");
1205  }
1206  if (temp_sz == 1){
1207  size[0] = params->global_work_size[0];
1208  work_dimension--;
1209  }
1210  else{
1211  size[0] = params->global_work_size[0];
1212  size[1] = params->global_work_size[1];
1213  if (temp_sz > 2){
1214  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1215  }
1216  }
1217  }
1218 
1219  cl_event gpuExec;
1220 
1221  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1222 
1223  clWaitForEvents(1, &gpuExec);
1224 
1225  char *result = (char *) malloc(typesz);
1226  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1227 
1228  std::vector<char> res = std::vector<char>();
1229  res.assign(result, result+sz);
1230 
1231  clReleaseCommandQueue (queue);
1232  clReleaseMemObject(buffer);
1233  clReleaseMemObject(buffer2);
1234  clReleaseEvent(gpuExec);
1235  free(result);
1236 
1237  return res;
1238  }
1239 
1240  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
1241  size_t sz = v->size();
1242  size_t sz2 = v2.size();
1243  size_t typesz = sizeof(char) * sz;
1244  size_t typesz2 = sizeof(int) * sz2;
1245  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1246  if (temp_sz > 0){
1247  if (temp_sz > 1){
1248  if (temp_sz > 2){
1249  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1250  }
1251  typesz = sizeof(char) * params->buffers_size[0];
1252  typesz2 = sizeof(int) * params->buffers_size[1];
1253  }
1254  else{
1255  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1256  }
1257  }
1258  cl_int error = 0;
1259  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1260  checkError(error);
1261  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1262  checkError(error);
1263  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1264  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1265  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1266 
1267  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1268  checkError (error);
1269  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1270  checkError (error);
1271 
1272  size_t size[2] = {sz, sz2};
1273  size_t work_dimension = 2;
1274 
1275  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1276  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1277  work_dimension--;
1278  }
1279  else if(temp_sz > 0){
1280  if (params->multi_dimensional){
1281  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1282  For default multidimensional global work size, leave the global_work_size vector empty, \
1283  and set multi_dimensional to true. Setting the global work size based on the values inside \
1284  the global_work_size vector.");
1285  }
1286  if (temp_sz == 1){
1287  size[0] = params->global_work_size[0];
1288  work_dimension--;
1289  }
1290  else{
1291  size[0] = params->global_work_size[0];
1292  size[1] = params->global_work_size[1];
1293  if (temp_sz > 2){
1294  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1295  }
1296  }
1297  }
1298 
1299  cl_event gpuExec;
1300 
1301  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1302 
1303  clWaitForEvents(1, &gpuExec);
1304 
1305  char *result = (char *) malloc(typesz);
1306  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1307 
1308  v->assign(result, result+sz);
1309 
1310  clReleaseCommandQueue (queue);
1311  clReleaseMemObject(buffer);
1312  clReleaseMemObject(buffer2);
1313  clReleaseEvent(gpuExec);
1314  free(result);
1315  }
1316 
1317  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, const ROS_OpenCL_Params* params){
1318  size_t sz = v->size();
1319  size_t sz2 = v2->size();
1320  size_t typesz = sizeof(char) * sz;
1321  size_t typesz2 = sizeof(int) * sz2;
1322  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1323  if (temp_sz > 0){
1324  if (temp_sz > 1){
1325  if (temp_sz > 2){
1326  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1327  }
1328  typesz = sizeof(char) * params->buffers_size[0];
1329  typesz2 = sizeof(int) * params->buffers_size[1];
1330  }
1331  else{
1332  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1333  }
1334  }
1335  cl_int error = 0;
1336  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1337  checkError(error);
1338  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1339  checkError(error);
1340  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1341  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1342  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1343 
1344  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1345  checkError (error);
1346  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1347  checkError (error);
1348 
1349  size_t size[2] = {sz, sz2};
1350  size_t work_dimension = 2;
1351 
1352  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1353  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1354  work_dimension--;
1355  }
1356  else if(temp_sz > 0){
1357  if (params->multi_dimensional){
1358  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1359  For default multidimensional global work size, leave the global_work_size vector empty, \
1360  and set multi_dimensional to true. Setting the global work size based on the values inside \
1361  the global_work_size vector.");
1362  }
1363  if (temp_sz == 1){
1364  size[0] = params->global_work_size[0];
1365  work_dimension--;
1366  }
1367  else{
1368  size[0] = params->global_work_size[0];
1369  size[1] = params->global_work_size[1];
1370  if (temp_sz > 2){
1371  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1372  }
1373  }
1374  }
1375 
1376  cl_event gpuExec;
1377  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1378 
1379  clWaitForEvents(1, &gpuExec);
1380 
1381  char *result = (char *) malloc(typesz);
1382  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1383 
1384  v->assign(result, result+sz);
1385 
1386  int *result2 = (int *) malloc(typesz2);
1387  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1388 
1389  v2->assign(result2, result2+sz2);
1390 
1391  clReleaseCommandQueue (queue);
1392  clReleaseMemObject(buffer);
1393  clReleaseMemObject(buffer2);
1394  clReleaseEvent(gpuExec);
1395  free(result);
1396  free(result2);
1397  }
1398 
1399  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
1400  size_t sz = v.size();
1401  size_t sz2 = v2.size();
1402  size_t typesz = sizeof(char) * sz;
1403  size_t typesz2 = sizeof(float) * sz2;
1404  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1405  if (temp_sz > 0){
1406  if (temp_sz > 1){
1407  if (temp_sz > 2){
1408  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1409  }
1410  typesz = sizeof(char) * params->buffers_size[0];
1411  typesz2 = sizeof(float) * params->buffers_size[1];
1412  }
1413  else{
1414  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1415  }
1416  }
1417  cl_int error = 0;
1418  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1419  checkError(error);
1420  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1421  checkError(error);
1422  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1423  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1424  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1425 
1426  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1427  checkError (error);
1428  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1429  checkError (error);
1430 
1431  size_t size[2] = {sz, sz2};
1432  size_t work_dimension = 2;
1433 
1434  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1435  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1436  work_dimension--;
1437  }
1438  else if(temp_sz > 0){
1439  if (params->multi_dimensional){
1440  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1441  For default multidimensional global work size, leave the global_work_size vector empty, \
1442  and set multi_dimensional to true. Setting the global work size based on the values inside \
1443  the global_work_size vector.");
1444  }
1445  if (temp_sz == 1){
1446  size[0] = params->global_work_size[0];
1447  work_dimension--;
1448  }
1449  else{
1450  size[0] = params->global_work_size[0];
1451  size[1] = params->global_work_size[1];
1452  if (temp_sz > 2){
1453  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1454  }
1455  }
1456  }
1457 
1458  cl_event gpuExec;
1459 
1460  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1461 
1462  clWaitForEvents(1, &gpuExec);
1463 
1464  char *result = (char *) malloc(typesz);
1465  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1466 
1467  std::vector<char> res = std::vector<char>();
1468  res.assign(result, result+sz);
1469 
1470  clReleaseCommandQueue (queue);
1471  clReleaseMemObject(buffer);
1472  clReleaseMemObject(buffer2);
1473  clReleaseEvent(gpuExec);
1474  free(result);
1475 
1476  return res;
1477  }
1478 
1479  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
1480  size_t sz = v->size();
1481  size_t sz2 = v2.size();
1482  size_t typesz = sizeof(char) * sz;
1483  size_t typesz2 = sizeof(float) * sz2;
1484  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1485  if (temp_sz > 0){
1486  if (temp_sz > 1){
1487  if (temp_sz > 2){
1488  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1489  }
1490  typesz = sizeof(char) * params->buffers_size[0];
1491  typesz2 = sizeof(float) * params->buffers_size[1];
1492  }
1493  else{
1494  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1495  }
1496  }
1497  cl_int error = 0;
1498  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1499  checkError(error);
1500  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1501  checkError(error);
1502  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1503  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1504  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1505 
1506  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1507  checkError (error);
1508  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1509  checkError (error);
1510 
1511  size_t size[2] = {sz, sz2};
1512  size_t work_dimension = 2;
1513 
1514  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1515  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1516  work_dimension--;
1517  }
1518  else if(temp_sz > 0){
1519  if (params->multi_dimensional){
1520  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1521  For default multidimensional global work size, leave the global_work_size vector empty, \
1522  and set multi_dimensional to true. Setting the global work size based on the values inside \
1523  the global_work_size vector.");
1524  }
1525  if (temp_sz == 1){
1526  size[0] = params->global_work_size[0];
1527  work_dimension--;
1528  }
1529  else{
1530  size[0] = params->global_work_size[0];
1531  size[1] = params->global_work_size[1];
1532  if (temp_sz > 2){
1533  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1534  }
1535  }
1536  }
1537 
1538  cl_event gpuExec;
1539 
1540  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1541 
1542  clWaitForEvents(1, &gpuExec);
1543 
1544  char *result = (char *) malloc(typesz);
1545  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1546 
1547  v->assign(result, result+sz);
1548 
1549  clReleaseCommandQueue (queue);
1550  clReleaseMemObject(buffer);
1551  clReleaseMemObject(buffer2);
1552  clReleaseEvent(gpuExec);
1553  free(result);
1554  }
1555 
1556  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, const ROS_OpenCL_Params* params){
1557  size_t sz = v->size();
1558  size_t sz2 = v2->size();
1559  size_t typesz = sizeof(char) * sz;
1560  size_t typesz2 = sizeof(float) * sz2;
1561  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1562  if (temp_sz > 0){
1563  if (temp_sz > 1){
1564  if (temp_sz > 2){
1565  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1566  }
1567  typesz = sizeof(char) * params->buffers_size[0];
1568  typesz2 = sizeof(float) * params->buffers_size[1];
1569  }
1570  else{
1571  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1572  }
1573  }
1574  cl_int error = 0;
1575  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1576  checkError(error);
1577  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1578  checkError(error);
1579  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1580  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1581  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1582 
1583  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1584  checkError (error);
1585  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1586  checkError (error);
1587 
1588  size_t size[2] = {sz, sz2};
1589  size_t work_dimension = 2;
1590 
1591  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1592  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1593  work_dimension--;
1594  }
1595  else if(temp_sz > 0){
1596  if (params->multi_dimensional){
1597  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1598  For default multidimensional global work size, leave the global_work_size vector empty, \
1599  and set multi_dimensional to true. Setting the global work size based on the values inside \
1600  the global_work_size vector.");
1601  }
1602  if (temp_sz == 1){
1603  size[0] = params->global_work_size[0];
1604  work_dimension--;
1605  }
1606  else{
1607  size[0] = params->global_work_size[0];
1608  size[1] = params->global_work_size[1];
1609  if (temp_sz > 2){
1610  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1611  }
1612  }
1613  }
1614 
1615  cl_event gpuExec;
1616  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1617 
1618  clWaitForEvents(1, &gpuExec);
1619 
1620  char *result = (char *) malloc(typesz);
1621  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1622 
1623  v->assign(result, result+sz);
1624 
1625  float *result2 = (float *) malloc(typesz2);
1626  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1627 
1628  v2->assign(result2, result2+sz2);
1629 
1630  clReleaseCommandQueue (queue);
1631  clReleaseMemObject(buffer);
1632  clReleaseMemObject(buffer2);
1633  clReleaseEvent(gpuExec);
1634  free(result);
1635  free(result2);
1636  }
1637 
1638  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
1639  size_t sz = v.size();
1640  size_t sz2 = v2.size();
1641  size_t typesz = sizeof(char) * sz;
1642  size_t typesz2 = sizeof(double) * sz2;
1643  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1644  if (temp_sz > 0){
1645  if (temp_sz > 1){
1646  if (temp_sz > 2){
1647  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1648  }
1649  typesz = sizeof(char) * params->buffers_size[0];
1650  typesz2 = sizeof(double) * params->buffers_size[1];
1651  }
1652  else{
1653  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1654  }
1655  }
1656  cl_int error = 0;
1657  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1658  checkError(error);
1659  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1660  checkError(error);
1661  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1662  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1663  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1664 
1665  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1666  checkError (error);
1667  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1668  checkError (error);
1669 
1670  size_t size[2] = {sz, sz2};
1671  size_t work_dimension = 2;
1672 
1673  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1674  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1675  work_dimension--;
1676  }
1677  else if(temp_sz > 0){
1678  if (params->multi_dimensional){
1679  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1680  For default multidimensional global work size, leave the global_work_size vector empty, \
1681  and set multi_dimensional to true. Setting the global work size based on the values inside \
1682  the global_work_size vector.");
1683  }
1684  if (temp_sz == 1){
1685  size[0] = params->global_work_size[0];
1686  work_dimension--;
1687  }
1688  else{
1689  size[0] = params->global_work_size[0];
1690  size[1] = params->global_work_size[1];
1691  if (temp_sz > 2){
1692  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1693  }
1694  }
1695  }
1696 
1697  cl_event gpuExec;
1698 
1699  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1700 
1701  clWaitForEvents(1, &gpuExec);
1702 
1703  char *result = (char *) malloc(typesz);
1704  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1705 
1706  std::vector<char> res = std::vector<char>();
1707  res.assign(result, result+sz);
1708 
1709  clReleaseCommandQueue (queue);
1710  clReleaseMemObject(buffer);
1711  clReleaseMemObject(buffer2);
1712  clReleaseEvent(gpuExec);
1713  free(result);
1714 
1715  return res;
1716  }
1717 
1718  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
1719  size_t sz = v->size();
1720  size_t sz2 = v2.size();
1721  size_t typesz = sizeof(char) * sz;
1722  size_t typesz2 = sizeof(double) * sz2;
1723  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1724  if (temp_sz > 0){
1725  if (temp_sz > 1){
1726  if (temp_sz > 2){
1727  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1728  }
1729  typesz = sizeof(char) * params->buffers_size[0];
1730  typesz2 = sizeof(double) * params->buffers_size[1];
1731  }
1732  else{
1733  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1734  }
1735  }
1736  cl_int error = 0;
1737  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1738  checkError(error);
1739  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1740  checkError(error);
1741  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1742  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1743  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1744 
1745  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1746  checkError (error);
1747  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1748  checkError (error);
1749 
1750  size_t size[2] = {sz, sz2};
1751  size_t work_dimension = 2;
1752 
1753  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1754  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1755  work_dimension--;
1756  }
1757  else if(temp_sz > 0){
1758  if (params->multi_dimensional){
1759  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1760  For default multidimensional global work size, leave the global_work_size vector empty, \
1761  and set multi_dimensional to true. Setting the global work size based on the values inside \
1762  the global_work_size vector.");
1763  }
1764  if (temp_sz == 1){
1765  size[0] = params->global_work_size[0];
1766  work_dimension--;
1767  }
1768  else{
1769  size[0] = params->global_work_size[0];
1770  size[1] = params->global_work_size[1];
1771  if (temp_sz > 2){
1772  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1773  }
1774  }
1775  }
1776 
1777  cl_event gpuExec;
1778 
1779  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1780 
1781  clWaitForEvents(1, &gpuExec);
1782 
1783  char *result = (char *) malloc(typesz);
1784  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1785 
1786  v->assign(result, result+sz);
1787 
1788  clReleaseCommandQueue (queue);
1789  clReleaseMemObject(buffer);
1790  clReleaseMemObject(buffer2);
1791  clReleaseEvent(gpuExec);
1792  free(result);
1793  }
1794 
1795  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, const ROS_OpenCL_Params* params){
1796  size_t sz = v->size();
1797  size_t sz2 = v2->size();
1798  size_t typesz = sizeof(char) * sz;
1799  size_t typesz2 = sizeof(double) * sz2;
1800  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1801  if (temp_sz > 0){
1802  if (temp_sz > 1){
1803  if (temp_sz > 2){
1804  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1805  }
1806  typesz = sizeof(char) * params->buffers_size[0];
1807  typesz2 = sizeof(double) * params->buffers_size[1];
1808  }
1809  else{
1810  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1811  }
1812  }
1813  cl_int error = 0;
1814  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1815  checkError(error);
1816  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1817  checkError(error);
1818  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1819  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1820  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1821 
1822  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1823  checkError (error);
1824  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1825  checkError (error);
1826 
1827  size_t size[2] = {sz, sz2};
1828  size_t work_dimension = 2;
1829 
1830  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1831  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1832  work_dimension--;
1833  }
1834  else if(temp_sz > 0){
1835  if (params->multi_dimensional){
1836  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1837  For default multidimensional global work size, leave the global_work_size vector empty, \
1838  and set multi_dimensional to true. Setting the global work size based on the values inside \
1839  the global_work_size vector.");
1840  }
1841  if (temp_sz == 1){
1842  size[0] = params->global_work_size[0];
1843  work_dimension--;
1844  }
1845  else{
1846  size[0] = params->global_work_size[0];
1847  size[1] = params->global_work_size[1];
1848  if (temp_sz > 2){
1849  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1850  }
1851  }
1852  }
1853 
1854  cl_event gpuExec;
1855  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1856 
1857  clWaitForEvents(1, &gpuExec);
1858 
1859  char *result = (char *) malloc(typesz);
1860  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1861 
1862  v->assign(result, result+sz);
1863 
1864  double *result2 = (double *) malloc(typesz2);
1865  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1866 
1867  v2->assign(result2, result2+sz2);
1868 
1869  clReleaseCommandQueue (queue);
1870  clReleaseMemObject(buffer);
1871  clReleaseMemObject(buffer2);
1872  clReleaseEvent(gpuExec);
1873  free(result);
1874  free(result2);
1875  }
1876 
1877  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
1878  size_t sz = v.size();
1879  size_t sz2 = v2.size();
1880  size_t typesz = sizeof(int) * sz;
1881  size_t typesz2 = sizeof(char) * sz2;
1882  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1883  if (temp_sz > 0){
1884  if (temp_sz > 1){
1885  if (temp_sz > 2){
1886  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1887  }
1888  typesz = sizeof(int) * params->buffers_size[0];
1889  typesz2 = sizeof(char) * params->buffers_size[1];
1890  }
1891  else{
1892  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1893  }
1894  }
1895  cl_int error = 0;
1896  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1897  checkError(error);
1898  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1899  checkError(error);
1900  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1901  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1902  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1903 
1904  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1905  checkError (error);
1906  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1907  checkError (error);
1908 
1909  size_t size[2] = {sz, sz2};
1910  size_t work_dimension = 2;
1911 
1912  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1913  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1914  work_dimension--;
1915  }
1916  else if(temp_sz > 0){
1917  if (params->multi_dimensional){
1918  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1919  For default multidimensional global work size, leave the global_work_size vector empty, \
1920  and set multi_dimensional to true. Setting the global work size based on the values inside \
1921  the global_work_size vector.");
1922  }
1923  if (temp_sz == 1){
1924  size[0] = params->global_work_size[0];
1925  work_dimension--;
1926  }
1927  else{
1928  size[0] = params->global_work_size[0];
1929  size[1] = params->global_work_size[1];
1930  if (temp_sz > 2){
1931  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1932  }
1933  }
1934  }
1935 
1936  cl_event gpuExec;
1937 
1938  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1939 
1940  clWaitForEvents(1, &gpuExec);
1941 
1942  int *result = (int *) malloc(typesz);
1943  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1944 
1945  std::vector<int> res = std::vector<int>();
1946  res.assign(result, result+sz);
1947 
1948  clReleaseCommandQueue (queue);
1949  clReleaseMemObject(buffer);
1950  clReleaseMemObject(buffer2);
1951  clReleaseEvent(gpuExec);
1952  free(result);
1953 
1954  return res;
1955  }
1956 
1957  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
1958  size_t sz = v->size();
1959  size_t sz2 = v2.size();
1960  size_t typesz = sizeof(int) * sz;
1961  size_t typesz2 = sizeof(char) * sz2;
1962  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
1963  if (temp_sz > 0){
1964  if (temp_sz > 1){
1965  if (temp_sz > 2){
1966  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1967  }
1968  typesz = sizeof(int) * params->buffers_size[0];
1969  typesz2 = sizeof(char) * params->buffers_size[1];
1970  }
1971  else{
1972  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1973  }
1974  }
1975  cl_int error = 0;
1976  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1977  checkError(error);
1978  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1979  checkError(error);
1980  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
1981  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
1982  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
1983 
1984  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1985  checkError (error);
1986  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1987  checkError (error);
1988 
1989  size_t size[2] = {sz, sz2};
1990  size_t work_dimension = 2;
1991 
1992  temp_sz = params != NULL ? params->global_work_size.size() : 0;
1993  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
1994  work_dimension--;
1995  }
1996  else if(temp_sz > 0){
1997  if (params->multi_dimensional){
1998  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
1999  For default multidimensional global work size, leave the global_work_size vector empty, \
2000  and set multi_dimensional to true. Setting the global work size based on the values inside \
2001  the global_work_size vector.");
2002  }
2003  if (temp_sz == 1){
2004  size[0] = params->global_work_size[0];
2005  work_dimension--;
2006  }
2007  else{
2008  size[0] = params->global_work_size[0];
2009  size[1] = params->global_work_size[1];
2010  if (temp_sz > 2){
2011  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2012  }
2013  }
2014  }
2015 
2016  cl_event gpuExec;
2017 
2018  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2019 
2020  clWaitForEvents(1, &gpuExec);
2021 
2022  int *result = (int *) malloc(typesz);
2023  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2024 
2025  v->assign(result, result+sz);
2026 
2027  clReleaseCommandQueue (queue);
2028  clReleaseMemObject(buffer);
2029  clReleaseMemObject(buffer2);
2030  clReleaseEvent(gpuExec);
2031  free(result);
2032  }
2033 
2034  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, const ROS_OpenCL_Params* params){
2035  size_t sz = v->size();
2036  size_t sz2 = v2->size();
2037  size_t typesz = sizeof(int) * sz;
2038  size_t typesz2 = sizeof(char) * sz2;
2039  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2040  if (temp_sz > 0){
2041  if (temp_sz > 1){
2042  if (temp_sz > 2){
2043  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2044  }
2045  typesz = sizeof(int) * params->buffers_size[0];
2046  typesz2 = sizeof(char) * params->buffers_size[1];
2047  }
2048  else{
2049  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2050  }
2051  }
2052  cl_int error = 0;
2053  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2054  checkError(error);
2055  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2056  checkError(error);
2057  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2058  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2059  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2060 
2061  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2062  checkError (error);
2063  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2064  checkError (error);
2065 
2066  size_t size[2] = {sz, sz2};
2067  size_t work_dimension = 2;
2068 
2069  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2070  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2071  work_dimension--;
2072  }
2073  else if(temp_sz > 0){
2074  if (params->multi_dimensional){
2075  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2076  For default multidimensional global work size, leave the global_work_size vector empty, \
2077  and set multi_dimensional to true. Setting the global work size based on the values inside \
2078  the global_work_size vector.");
2079  }
2080  if (temp_sz == 1){
2081  size[0] = params->global_work_size[0];
2082  work_dimension--;
2083  }
2084  else{
2085  size[0] = params->global_work_size[0];
2086  size[1] = params->global_work_size[1];
2087  if (temp_sz > 2){
2088  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2089  }
2090  }
2091  }
2092 
2093  cl_event gpuExec;
2094  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2095 
2096  clWaitForEvents(1, &gpuExec);
2097 
2098  int *result = (int *) malloc(typesz);
2099  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2100 
2101  v->assign(result, result+sz);
2102 
2103  char *result2 = (char *) malloc(typesz2);
2104  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2105 
2106  v2->assign(result2, result2+sz2);
2107 
2108  clReleaseCommandQueue (queue);
2109  clReleaseMemObject(buffer);
2110  clReleaseMemObject(buffer2);
2111  clReleaseEvent(gpuExec);
2112  free(result);
2113  free(result2);
2114  }
2115 
2116  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
2117  size_t sz = v.size();
2118  size_t sz2 = v2.size();
2119  size_t typesz = sizeof(int) * sz;
2120  size_t typesz2 = sizeof(int) * sz2;
2121  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2122  if (temp_sz > 0){
2123  if (temp_sz > 1){
2124  if (temp_sz > 2){
2125  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2126  }
2127  typesz = sizeof(int) * params->buffers_size[0];
2128  typesz2 = sizeof(int) * params->buffers_size[1];
2129  }
2130  else{
2131  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2132  }
2133  }
2134  cl_int error = 0;
2135  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2136  checkError(error);
2137  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2138  checkError(error);
2139  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2140  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2141  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2142 
2143  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2144  checkError (error);
2145  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2146  checkError (error);
2147 
2148  size_t size[2] = {sz, sz2};
2149  size_t work_dimension = 2;
2150 
2151  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2152  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2153  work_dimension--;
2154  }
2155  else if(temp_sz > 0){
2156  if (params->multi_dimensional){
2157  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2158  For default multidimensional global work size, leave the global_work_size vector empty, \
2159  and set multi_dimensional to true. Setting the global work size based on the values inside \
2160  the global_work_size vector.");
2161  }
2162  if (temp_sz == 1){
2163  size[0] = params->global_work_size[0];
2164  work_dimension--;
2165  }
2166  else{
2167  size[0] = params->global_work_size[0];
2168  size[1] = params->global_work_size[1];
2169  if (temp_sz > 2){
2170  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2171  }
2172  }
2173  }
2174 
2175  cl_event gpuExec;
2176 
2177  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2178 
2179  clWaitForEvents(1, &gpuExec);
2180 
2181  int *result = (int *) malloc(typesz);
2182  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2183 
2184  std::vector<int> res = std::vector<int>();
2185  res.assign(result, result+sz);
2186 
2187  clReleaseCommandQueue (queue);
2188  clReleaseMemObject(buffer);
2189  clReleaseMemObject(buffer2);
2190  clReleaseEvent(gpuExec);
2191  free(result);
2192 
2193  return res;
2194  }
2195 
2196  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
2197  size_t sz = v->size();
2198  size_t sz2 = v2.size();
2199  size_t typesz = sizeof(int) * sz;
2200  size_t typesz2 = sizeof(int) * sz2;
2201  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2202  if (temp_sz > 0){
2203  if (temp_sz > 1){
2204  if (temp_sz > 2){
2205  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2206  }
2207  typesz = sizeof(int) * params->buffers_size[0];
2208  typesz2 = sizeof(int) * params->buffers_size[1];
2209  }
2210  else{
2211  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2212  }
2213  }
2214  cl_int error = 0;
2215  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2216  checkError(error);
2217  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2218  checkError(error);
2219  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2220  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2221  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2222 
2223  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2224  checkError (error);
2225  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2226  checkError (error);
2227 
2228  size_t size[2] = {sz, sz2};
2229  size_t work_dimension = 2;
2230 
2231  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2232  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2233  work_dimension--;
2234  }
2235  else if(temp_sz > 0){
2236  if (params->multi_dimensional){
2237  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2238  For default multidimensional global work size, leave the global_work_size vector empty, \
2239  and set multi_dimensional to true. Setting the global work size based on the values inside \
2240  the global_work_size vector.");
2241  }
2242  if (temp_sz == 1){
2243  size[0] = params->global_work_size[0];
2244  work_dimension--;
2245  }
2246  else{
2247  size[0] = params->global_work_size[0];
2248  size[1] = params->global_work_size[1];
2249  if (temp_sz > 2){
2250  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2251  }
2252  }
2253  }
2254 
2255  cl_event gpuExec;
2256 
2257  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2258 
2259  clWaitForEvents(1, &gpuExec);
2260 
2261  int *result = (int *) malloc(typesz);
2262  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2263 
2264  v->assign(result, result+sz);
2265 
2266  clReleaseCommandQueue (queue);
2267  clReleaseMemObject(buffer);
2268  clReleaseMemObject(buffer2);
2269  clReleaseEvent(gpuExec);
2270  free(result);
2271  }
2272 
2273  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, const ROS_OpenCL_Params* params){
2274  size_t sz = v->size();
2275  size_t sz2 = v2->size();
2276  size_t typesz = sizeof(int) * sz;
2277  size_t typesz2 = sizeof(int) * sz2;
2278  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2279  if (temp_sz > 0){
2280  if (temp_sz > 1){
2281  if (temp_sz > 2){
2282  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2283  }
2284  typesz = sizeof(int) * params->buffers_size[0];
2285  typesz2 = sizeof(int) * params->buffers_size[1];
2286  }
2287  else{
2288  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2289  }
2290  }
2291  cl_int error = 0;
2292  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2293  checkError(error);
2294  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2295  checkError(error);
2296  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2297  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2298  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2299 
2300  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2301  checkError (error);
2302  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2303  checkError (error);
2304 
2305  size_t size[2] = {sz, sz2};
2306  size_t work_dimension = 2;
2307 
2308  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2309  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2310  work_dimension--;
2311  }
2312  else if(temp_sz > 0){
2313  if (params->multi_dimensional){
2314  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2315  For default multidimensional global work size, leave the global_work_size vector empty, \
2316  and set multi_dimensional to true. Setting the global work size based on the values inside \
2317  the global_work_size vector.");
2318  }
2319  if (temp_sz == 1){
2320  size[0] = params->global_work_size[0];
2321  work_dimension--;
2322  }
2323  else{
2324  size[0] = params->global_work_size[0];
2325  size[1] = params->global_work_size[1];
2326  if (temp_sz > 2){
2327  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2328  }
2329  }
2330  }
2331 
2332  cl_event gpuExec;
2333  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2334 
2335  clWaitForEvents(1, &gpuExec);
2336 
2337  int *result = (int *) malloc(typesz);
2338  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2339 
2340  v->assign(result, result+sz);
2341 
2342  if (typesz2 != typesz or sz != sz2){
2343  int *result2;
2344  result2 = (int *) malloc(typesz2);
2345  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2346 
2347  v2->assign(result2, result2+sz2);
2348  free(result2);
2349  }
2350  else{
2351  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
2352 
2353  v2->assign(result, result+sz2);
2354  }
2355 
2356  clReleaseCommandQueue (queue);
2357  clReleaseMemObject(buffer);
2358  clReleaseMemObject(buffer2);
2359  clReleaseEvent(gpuExec);
2360  free(result);
2361  }
2362 
2363  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
2364  size_t sz = v.size();
2365  size_t sz2 = v2.size();
2366  size_t typesz = sizeof(int) * sz;
2367  size_t typesz2 = sizeof(float) * sz2;
2368  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2369  if (temp_sz > 0){
2370  if (temp_sz > 1){
2371  if (temp_sz > 2){
2372  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2373  }
2374  typesz = sizeof(int) * params->buffers_size[0];
2375  typesz2 = sizeof(float) * params->buffers_size[1];
2376  }
2377  else{
2378  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2379  }
2380  }
2381  cl_int error = 0;
2382  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2383  checkError(error);
2384  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2385  checkError(error);
2386  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2387  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2388  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2389 
2390  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2391  checkError (error);
2392  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2393  checkError (error);
2394 
2395  size_t size[2] = {sz, sz2};
2396  size_t work_dimension = 2;
2397 
2398  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2399  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2400  work_dimension--;
2401  }
2402  else if(temp_sz > 0){
2403  if (params->multi_dimensional){
2404  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2405  For default multidimensional global work size, leave the global_work_size vector empty, \
2406  and set multi_dimensional to true. Setting the global work size based on the values inside \
2407  the global_work_size vector.");
2408  }
2409  if (temp_sz == 1){
2410  size[0] = params->global_work_size[0];
2411  work_dimension--;
2412  }
2413  else{
2414  size[0] = params->global_work_size[0];
2415  size[1] = params->global_work_size[1];
2416  if (temp_sz > 2){
2417  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2418  }
2419  }
2420  }
2421 
2422  cl_event gpuExec;
2423 
2424  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2425 
2426  clWaitForEvents(1, &gpuExec);
2427 
2428  int *result = (int *) malloc(typesz);
2429  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2430 
2431  std::vector<int> res = std::vector<int>();
2432  res.assign(result, result+sz);
2433 
2434  clReleaseCommandQueue (queue);
2435  clReleaseMemObject(buffer);
2436  clReleaseMemObject(buffer2);
2437  clReleaseEvent(gpuExec);
2438  free(result);
2439 
2440  return res;
2441  }
2442 
2443  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
2444  size_t sz = v->size();
2445  size_t sz2 = v2.size();
2446  size_t typesz = sizeof(int) * sz;
2447  size_t typesz2 = sizeof(float) * sz2;
2448  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2449  if (temp_sz > 0){
2450  if (temp_sz > 1){
2451  if (temp_sz > 2){
2452  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2453  }
2454  typesz = sizeof(int) * params->buffers_size[0];
2455  typesz2 = sizeof(float) * params->buffers_size[1];
2456  }
2457  else{
2458  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2459  }
2460  }
2461  cl_int error = 0;
2462  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2463  checkError(error);
2464  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2465  checkError(error);
2466  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2467  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2468  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2469 
2470  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2471  checkError (error);
2472  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2473  checkError (error);
2474 
2475  size_t size[2] = {sz, sz2};
2476  size_t work_dimension = 2;
2477 
2478  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2479  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2480  work_dimension--;
2481  }
2482  else if(temp_sz > 0){
2483  if (params->multi_dimensional){
2484  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2485  For default multidimensional global work size, leave the global_work_size vector empty, \
2486  and set multi_dimensional to true. Setting the global work size based on the values inside \
2487  the global_work_size vector.");
2488  }
2489  if (temp_sz == 1){
2490  size[0] = params->global_work_size[0];
2491  work_dimension--;
2492  }
2493  else{
2494  size[0] = params->global_work_size[0];
2495  size[1] = params->global_work_size[1];
2496  if (temp_sz > 2){
2497  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2498  }
2499  }
2500  }
2501 
2502  cl_event gpuExec;
2503 
2504  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2505 
2506  clWaitForEvents(1, &gpuExec);
2507 
2508  int *result = (int *) malloc(typesz);
2509  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2510 
2511  v->assign(result, result+sz);
2512 
2513  clReleaseCommandQueue (queue);
2514  clReleaseMemObject(buffer);
2515  clReleaseMemObject(buffer2);
2516  clReleaseEvent(gpuExec);
2517  free(result);
2518  }
2519 
2520  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, const ROS_OpenCL_Params* params){
2521  size_t sz = v->size();
2522  size_t sz2 = v2->size();
2523  size_t typesz = sizeof(int) * sz;
2524  size_t typesz2 = sizeof(float) * sz2;
2525  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2526  if (temp_sz > 0){
2527  if (temp_sz > 1){
2528  if (temp_sz > 2){
2529  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2530  }
2531  typesz = sizeof(int) * params->buffers_size[0];
2532  typesz2 = sizeof(float) * params->buffers_size[1];
2533  }
2534  else{
2535  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2536  }
2537  }
2538  cl_int error = 0;
2539  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2540  checkError(error);
2541  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2542  checkError(error);
2543  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2544  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2545  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2546 
2547  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2548  checkError (error);
2549  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2550  checkError (error);
2551 
2552  size_t size[2] = {sz, sz2};
2553  size_t work_dimension = 2;
2554 
2555  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2556  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2557  work_dimension--;
2558  }
2559  else if(temp_sz > 0){
2560  if (params->multi_dimensional){
2561  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2562  For default multidimensional global work size, leave the global_work_size vector empty, \
2563  and set multi_dimensional to true. Setting the global work size based on the values inside \
2564  the global_work_size vector.");
2565  }
2566  if (temp_sz == 1){
2567  size[0] = params->global_work_size[0];
2568  work_dimension--;
2569  }
2570  else{
2571  size[0] = params->global_work_size[0];
2572  size[1] = params->global_work_size[1];
2573  if (temp_sz > 2){
2574  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2575  }
2576  }
2577  }
2578 
2579  cl_event gpuExec;
2580  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2581 
2582  clWaitForEvents(1, &gpuExec);
2583 
2584  int *result = (int *) malloc(typesz);
2585  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2586 
2587  v->assign(result, result+sz);
2588 
2589  float *result2 = (float *) malloc(typesz2);
2590  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2591 
2592  v2->assign(result2, result2+sz2);
2593 
2594  clReleaseCommandQueue (queue);
2595  clReleaseMemObject(buffer);
2596  clReleaseMemObject(buffer2);
2597  clReleaseEvent(gpuExec);
2598  free(result);
2599  free(result2);
2600  }
2601 
2602  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
2603  size_t sz = v.size();
2604  size_t sz2 = v2.size();
2605  size_t typesz = sizeof(int) * sz;
2606  size_t typesz2 = sizeof(double) * sz2;
2607  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2608  if (temp_sz > 0){
2609  if (temp_sz > 1){
2610  if (temp_sz > 2){
2611  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2612  }
2613  typesz = sizeof(int) * params->buffers_size[0];
2614  typesz2 = sizeof(double) * params->buffers_size[1];
2615  }
2616  else{
2617  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2618  }
2619  }
2620  cl_int error = 0;
2621  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2622  checkError(error);
2623  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2624  checkError(error);
2625  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2626  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2627  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2628 
2629  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2630  checkError (error);
2631  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2632  checkError (error);
2633 
2634  size_t size[2] = {sz, sz2};
2635  size_t work_dimension = 2;
2636 
2637  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2638  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2639  work_dimension--;
2640  }
2641  else if(temp_sz > 0){
2642  if (params->multi_dimensional){
2643  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2644  For default multidimensional global work size, leave the global_work_size vector empty, \
2645  and set multi_dimensional to true. Setting the global work size based on the values inside \
2646  the global_work_size vector.");
2647  }
2648  if (temp_sz == 1){
2649  size[0] = params->global_work_size[0];
2650  work_dimension--;
2651  }
2652  else{
2653  size[0] = params->global_work_size[0];
2654  size[1] = params->global_work_size[1];
2655  if (temp_sz > 2){
2656  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2657  }
2658  }
2659  }
2660 
2661  cl_event gpuExec;
2662 
2663  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2664 
2665  clWaitForEvents(1, &gpuExec);
2666 
2667  int *result = (int *) malloc(typesz);
2668  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2669 
2670  std::vector<int> res = std::vector<int>();
2671  res.assign(result, result+sz);
2672 
2673  clReleaseCommandQueue (queue);
2674  clReleaseMemObject(buffer);
2675  clReleaseMemObject(buffer2);
2676  clReleaseEvent(gpuExec);
2677  free(result);
2678 
2679  return res;
2680  }
2681 
2682  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
2683  size_t sz = v->size();
2684  size_t sz2 = v2.size();
2685  size_t typesz = sizeof(int) * sz;
2686  size_t typesz2 = sizeof(double) * sz2;
2687  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2688  if (temp_sz > 0){
2689  if (temp_sz > 1){
2690  if (temp_sz > 2){
2691  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2692  }
2693  typesz = sizeof(int) * params->buffers_size[0];
2694  typesz2 = sizeof(double) * params->buffers_size[1];
2695  }
2696  else{
2697  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2698  }
2699  }
2700  cl_int error = 0;
2701  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2702  checkError(error);
2703  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2704  checkError(error);
2705  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2706  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2707  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2708 
2709  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2710  checkError (error);
2711  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2712  checkError (error);
2713 
2714  size_t size[2] = {sz, sz2};
2715  size_t work_dimension = 2;
2716 
2717  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2718  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2719  work_dimension--;
2720  }
2721  else if(temp_sz > 0){
2722  if (params->multi_dimensional){
2723  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2724  For default multidimensional global work size, leave the global_work_size vector empty, \
2725  and set multi_dimensional to true. Setting the global work size based on the values inside \
2726  the global_work_size vector.");
2727  }
2728  if (temp_sz == 1){
2729  size[0] = params->global_work_size[0];
2730  work_dimension--;
2731  }
2732  else{
2733  size[0] = params->global_work_size[0];
2734  size[1] = params->global_work_size[1];
2735  if (temp_sz > 2){
2736  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2737  }
2738  }
2739  }
2740 
2741  cl_event gpuExec;
2742 
2743  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2744 
2745  clWaitForEvents(1, &gpuExec);
2746 
2747  int *result = (int *) malloc(typesz);
2748  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2749 
2750  v->assign(result, result+sz);
2751 
2752  clReleaseCommandQueue (queue);
2753  clReleaseMemObject(buffer);
2754  clReleaseMemObject(buffer2);
2755  clReleaseEvent(gpuExec);
2756  free(result);
2757  }
2758 
2759  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, const ROS_OpenCL_Params* params){
2760  size_t sz = v->size();
2761  size_t sz2 = v2->size();
2762  size_t typesz = sizeof(int) * sz;
2763  size_t typesz2 = sizeof(double) * sz2;
2764  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2765  if (temp_sz > 0){
2766  if (temp_sz > 1){
2767  if (temp_sz > 2){
2768  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2769  }
2770  typesz = sizeof(int) * params->buffers_size[0];
2771  typesz2 = sizeof(double) * params->buffers_size[1];
2772  }
2773  else{
2774  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2775  }
2776  }
2777  cl_int error = 0;
2778  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2779  checkError(error);
2780  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2781  checkError(error);
2782  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2783  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2784  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2785 
2786  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2787  checkError (error);
2788  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2789  checkError (error);
2790 
2791  size_t size[2] = {sz, sz2};
2792  size_t work_dimension = 2;
2793 
2794  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2795  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2796  work_dimension--;
2797  }
2798  else if(temp_sz > 0){
2799  if (params->multi_dimensional){
2800  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2801  For default multidimensional global work size, leave the global_work_size vector empty, \
2802  and set multi_dimensional to true. Setting the global work size based on the values inside \
2803  the global_work_size vector.");
2804  }
2805  if (temp_sz == 1){
2806  size[0] = params->global_work_size[0];
2807  work_dimension--;
2808  }
2809  else{
2810  size[0] = params->global_work_size[0];
2811  size[1] = params->global_work_size[1];
2812  if (temp_sz > 2){
2813  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2814  }
2815  }
2816  }
2817 
2818  cl_event gpuExec;
2819  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2820 
2821  clWaitForEvents(1, &gpuExec);
2822 
2823  int *result = (int *) malloc(typesz);
2824  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2825 
2826  v->assign(result, result+sz);
2827 
2828  double *result2 = (double *) malloc(typesz2);
2829  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2830 
2831  v2->assign(result2, result2+sz2);
2832 
2833  clReleaseCommandQueue (queue);
2834  clReleaseMemObject(buffer);
2835  clReleaseMemObject(buffer2);
2836  clReleaseEvent(gpuExec);
2837  free(result);
2838  free(result2);
2839  }
2840 
2841  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
2842  size_t sz = v.size();
2843  size_t sz2 = v2.size();
2844  size_t typesz = sizeof(float) * sz;
2845  size_t typesz2 = sizeof(char) * sz2;
2846  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2847  if (temp_sz > 0){
2848  if (temp_sz > 1){
2849  if (temp_sz > 2){
2850  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2851  }
2852  typesz = sizeof(float) * params->buffers_size[0];
2853  typesz2 = sizeof(char) * params->buffers_size[1];
2854  }
2855  else{
2856  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2857  }
2858  }
2859  cl_int error = 0;
2860  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2861  checkError(error);
2862  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2863  checkError(error);
2864  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2865  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2866  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2867 
2868  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2869  checkError (error);
2870  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2871  checkError (error);
2872 
2873  size_t size[2] = {sz, sz2};
2874  size_t work_dimension = 2;
2875 
2876  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2877  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2878  work_dimension--;
2879  }
2880  else if(temp_sz > 0){
2881  if (params->multi_dimensional){
2882  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2883  For default multidimensional global work size, leave the global_work_size vector empty, \
2884  and set multi_dimensional to true. Setting the global work size based on the values inside \
2885  the global_work_size vector.");
2886  }
2887  if (temp_sz == 1){
2888  size[0] = params->global_work_size[0];
2889  work_dimension--;
2890  }
2891  else{
2892  size[0] = params->global_work_size[0];
2893  size[1] = params->global_work_size[1];
2894  if (temp_sz > 2){
2895  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2896  }
2897  }
2898  }
2899 
2900  cl_event gpuExec;
2901 
2902  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2903 
2904  clWaitForEvents(1, &gpuExec);
2905 
2906  float *result = (float *) malloc(typesz);
2907  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2908 
2909  std::vector<float> res = std::vector<float>();
2910  res.assign(result, result+sz);
2911 
2912  clReleaseCommandQueue (queue);
2913  clReleaseMemObject(buffer);
2914  clReleaseMemObject(buffer2);
2915  clReleaseEvent(gpuExec);
2916  free(result);
2917 
2918  return res;
2919  }
2920 
2921  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
2922  size_t sz = v->size();
2923  size_t sz2 = v2.size();
2924  size_t typesz = sizeof(float) * sz;
2925  size_t typesz2 = sizeof(char) * sz2;
2926  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
2927  if (temp_sz > 0){
2928  if (temp_sz > 1){
2929  if (temp_sz > 2){
2930  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2931  }
2932  typesz = sizeof(float) * params->buffers_size[0];
2933  typesz2 = sizeof(char) * params->buffers_size[1];
2934  }
2935  else{
2936  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2937  }
2938  }
2939  cl_int error = 0;
2940  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2941  checkError(error);
2942  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2943  checkError(error);
2944  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
2945  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
2946  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
2947 
2948  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2949  checkError (error);
2950  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2951  checkError (error);
2952 
2953  size_t size[2] = {sz, sz2};
2954  size_t work_dimension = 2;
2955 
2956  temp_sz = params != NULL ? params->global_work_size.size() : 0;
2957  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
2958  work_dimension--;
2959  }
2960  else if(temp_sz > 0){
2961  if (params->multi_dimensional){
2962  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
2963  For default multidimensional global work size, leave the global_work_size vector empty, \
2964  and set multi_dimensional to true. Setting the global work size based on the values inside \
2965  the global_work_size vector.");
2966  }
2967  if (temp_sz == 1){
2968  size[0] = params->global_work_size[0];
2969  work_dimension--;
2970  }
2971  else{
2972  size[0] = params->global_work_size[0];
2973  size[1] = params->global_work_size[1];
2974  if (temp_sz > 2){
2975  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2976  }
2977  }
2978  }
2979 
2980  cl_event gpuExec;
2981 
2982  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2983 
2984  clWaitForEvents(1, &gpuExec);
2985 
2986  float *result = (float *) malloc(typesz);
2987  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2988 
2989  v->assign(result, result+sz);
2990 
2991  clReleaseCommandQueue (queue);
2992  clReleaseMemObject(buffer);
2993  clReleaseMemObject(buffer2);
2994  clReleaseEvent(gpuExec);
2995  free(result);
2996  }
2997 
2998  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, const ROS_OpenCL_Params* params){
2999  size_t sz = v->size();
3000  size_t sz2 = v2->size();
3001  size_t typesz = sizeof(float) * sz;
3002  size_t typesz2 = sizeof(char) * sz2;
3003  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3004  if (temp_sz > 0){
3005  if (temp_sz > 1){
3006  if (temp_sz > 2){
3007  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3008  }
3009  typesz = sizeof(float) * params->buffers_size[0];
3010  typesz2 = sizeof(char) * params->buffers_size[1];
3011  }
3012  else{
3013  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3014  }
3015  }
3016  cl_int error = 0;
3017  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3018  checkError(error);
3019  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3020  checkError(error);
3021  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3022  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3023  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3024 
3025  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3026  checkError (error);
3027  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3028  checkError (error);
3029 
3030  size_t size[2] = {sz, sz2};
3031  size_t work_dimension = 2;
3032 
3033  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3034  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3035  work_dimension--;
3036  }
3037  else if(temp_sz > 0){
3038  if (params->multi_dimensional){
3039  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3040  For default multidimensional global work size, leave the global_work_size vector empty, \
3041  and set multi_dimensional to true. Setting the global work size based on the values inside \
3042  the global_work_size vector.");
3043  }
3044  if (temp_sz == 1){
3045  size[0] = params->global_work_size[0];
3046  work_dimension--;
3047  }
3048  else{
3049  size[0] = params->global_work_size[0];
3050  size[1] = params->global_work_size[1];
3051  if (temp_sz > 2){
3052  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3053  }
3054  }
3055  }
3056 
3057  cl_event gpuExec;
3058  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3059 
3060  clWaitForEvents(1, &gpuExec);
3061 
3062  float *result = (float *) malloc(typesz);
3063  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3064 
3065  v->assign(result, result+sz);
3066 
3067  char *result2 = (char *) malloc(typesz2);
3068  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3069 
3070  v2->assign(result2, result2+sz2);
3071 
3072  clReleaseCommandQueue (queue);
3073  clReleaseMemObject(buffer);
3074  clReleaseMemObject(buffer2);
3075  clReleaseEvent(gpuExec);
3076  free(result);
3077  free(result2);
3078  }
3079 
3080  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
3081  size_t sz = v.size();
3082  size_t sz2 = v2.size();
3083  size_t typesz = sizeof(float) * sz;
3084  size_t typesz2 = sizeof(int) * sz2;
3085  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3086  if (temp_sz > 0){
3087  if (temp_sz > 1){
3088  if (temp_sz > 2){
3089  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3090  }
3091  typesz = sizeof(float) * params->buffers_size[0];
3092  typesz2 = sizeof(int) * params->buffers_size[1];
3093  }
3094  else{
3095  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3096  }
3097  }
3098  cl_int error = 0;
3099  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3100  checkError(error);
3101  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3102  checkError(error);
3103  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3104  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3105  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3106 
3107  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3108  checkError (error);
3109  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3110  checkError (error);
3111 
3112  size_t size[2] = {sz, sz2};
3113  size_t work_dimension = 2;
3114 
3115  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3116  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3117  work_dimension--;
3118  }
3119  else if(temp_sz > 0){
3120  if (params->multi_dimensional){
3121  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3122  For default multidimensional global work size, leave the global_work_size vector empty, \
3123  and set multi_dimensional to true. Setting the global work size based on the values inside \
3124  the global_work_size vector.");
3125  }
3126  if (temp_sz == 1){
3127  size[0] = params->global_work_size[0];
3128  work_dimension--;
3129  }
3130  else{
3131  size[0] = params->global_work_size[0];
3132  size[1] = params->global_work_size[1];
3133  if (temp_sz > 2){
3134  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3135  }
3136  }
3137  }
3138 
3139  cl_event gpuExec;
3140 
3141  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3142 
3143  clWaitForEvents(1, &gpuExec);
3144 
3145  float *result = (float *) malloc(typesz);
3146  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3147 
3148  std::vector<float> res = std::vector<float>();
3149  res.assign(result, result+sz);
3150 
3151  clReleaseCommandQueue (queue);
3152  clReleaseMemObject(buffer);
3153  clReleaseMemObject(buffer2);
3154  clReleaseEvent(gpuExec);
3155  free(result);
3156 
3157  return res;
3158  }
3159 
3160  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
3161  size_t sz = v->size();
3162  size_t sz2 = v2.size();
3163  size_t typesz = sizeof(float) * sz;
3164  size_t typesz2 = sizeof(int) * sz2;
3165  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3166  if (temp_sz > 0){
3167  if (temp_sz > 1){
3168  if (temp_sz > 2){
3169  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3170  }
3171  typesz = sizeof(float) * params->buffers_size[0];
3172  typesz2 = sizeof(int) * params->buffers_size[1];
3173  }
3174  else{
3175  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3176  }
3177  }
3178  cl_int error = 0;
3179  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3180  checkError(error);
3181  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3182  checkError(error);
3183  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3184  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3185  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3186 
3187  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3188  checkError (error);
3189  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3190  checkError (error);
3191 
3192  size_t size[2] = {sz, sz2};
3193  size_t work_dimension = 2;
3194 
3195  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3196  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3197  work_dimension--;
3198  }
3199  else if(temp_sz > 0){
3200  if (params->multi_dimensional){
3201  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3202  For default multidimensional global work size, leave the global_work_size vector empty, \
3203  and set multi_dimensional to true. Setting the global work size based on the values inside \
3204  the global_work_size vector.");
3205  }
3206  if (temp_sz == 1){
3207  size[0] = params->global_work_size[0];
3208  work_dimension--;
3209  }
3210  else{
3211  size[0] = params->global_work_size[0];
3212  size[1] = params->global_work_size[1];
3213  if (temp_sz > 2){
3214  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3215  }
3216  }
3217  }
3218 
3219  cl_event gpuExec;
3220 
3221  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3222 
3223  clWaitForEvents(1, &gpuExec);
3224 
3225  float *result = (float *) malloc(typesz);
3226  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3227 
3228  v->assign(result, result+sz);
3229 
3230  clReleaseCommandQueue (queue);
3231  clReleaseMemObject(buffer);
3232  clReleaseMemObject(buffer2);
3233  clReleaseEvent(gpuExec);
3234  free(result);
3235  }
3236 
3237  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, const ROS_OpenCL_Params* params){
3238  size_t sz = v->size();
3239  size_t sz2 = v2->size();
3240  size_t typesz = sizeof(float) * sz;
3241  size_t typesz2 = sizeof(int) * sz2;
3242  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3243  if (temp_sz > 0){
3244  if (temp_sz > 1){
3245  if (temp_sz > 2){
3246  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3247  }
3248  typesz = sizeof(float) * params->buffers_size[0];
3249  typesz2 = sizeof(int) * params->buffers_size[1];
3250  }
3251  else{
3252  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3253  }
3254  }
3255  cl_int error = 0;
3256  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3257  checkError(error);
3258  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3259  checkError(error);
3260  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3261  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3262  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3263 
3264  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3265  checkError (error);
3266  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3267  checkError (error);
3268 
3269  size_t size[2] = {sz, sz2};
3270  size_t work_dimension = 2;
3271 
3272  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3273  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3274  work_dimension--;
3275  }
3276  else if(temp_sz > 0){
3277  if (params->multi_dimensional){
3278  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3279  For default multidimensional global work size, leave the global_work_size vector empty, \
3280  and set multi_dimensional to true. Setting the global work size based on the values inside \
3281  the global_work_size vector.");
3282  }
3283  if (temp_sz == 1){
3284  size[0] = params->global_work_size[0];
3285  work_dimension--;
3286  }
3287  else{
3288  size[0] = params->global_work_size[0];
3289  size[1] = params->global_work_size[1];
3290  if (temp_sz > 2){
3291  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3292  }
3293  }
3294  }
3295 
3296  cl_event gpuExec;
3297  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3298 
3299  clWaitForEvents(1, &gpuExec);
3300 
3301  float *result = (float *) malloc(typesz);
3302  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3303 
3304  v->assign(result, result+sz);
3305 
3306  int *result2 = (int *) malloc(typesz2);
3307  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3308 
3309  v2->assign(result2, result2+sz2);
3310 
3311  clReleaseCommandQueue (queue);
3312  clReleaseMemObject(buffer);
3313  clReleaseMemObject(buffer2);
3314  clReleaseEvent(gpuExec);
3315  free(result);
3316  free(result2);
3317  }
3318 
3319  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
3320  size_t sz = v.size();
3321  size_t sz2 = v2.size();
3322  size_t typesz = sizeof(float) * sz;
3323  size_t typesz2 = sizeof(float) * sz2;
3324  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3325  if (temp_sz > 0){
3326  if (temp_sz > 1){
3327  if (temp_sz > 2){
3328  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3329  }
3330  typesz = sizeof(float) * params->buffers_size[0];
3331  typesz2 = sizeof(float) * params->buffers_size[1];
3332  }
3333  else{
3334  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3335  }
3336  }
3337  cl_int error = 0;
3338  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3339  checkError(error);
3340  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3341  checkError(error);
3342  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3343  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3344  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3345 
3346  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3347  checkError (error);
3348  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3349  checkError (error);
3350 
3351  size_t size[2] = {sz, sz2};
3352  size_t work_dimension = 2;
3353 
3354  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3355  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3356  work_dimension--;
3357  }
3358  else if(temp_sz > 0){
3359  if (params->multi_dimensional){
3360  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3361  For default multidimensional global work size, leave the global_work_size vector empty, \
3362  and set multi_dimensional to true. Setting the global work size based on the values inside \
3363  the global_work_size vector.");
3364  }
3365  if (temp_sz == 1){
3366  size[0] = params->global_work_size[0];
3367  work_dimension--;
3368  }
3369  else{
3370  size[0] = params->global_work_size[0];
3371  size[1] = params->global_work_size[1];
3372  if (temp_sz > 2){
3373  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3374  }
3375  }
3376  }
3377 
3378  cl_event gpuExec;
3379 
3380  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3381 
3382  clWaitForEvents(1, &gpuExec);
3383 
3384  float *result = (float *) malloc(typesz);
3385  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3386 
3387  std::vector<float> res = std::vector<float>();
3388  res.assign(result, result+sz);
3389 
3390  clReleaseCommandQueue (queue);
3391  clReleaseMemObject(buffer);
3392  clReleaseMemObject(buffer2);
3393  clReleaseEvent(gpuExec);
3394  free(result);
3395 
3396  return res;
3397  }
3398 
3399  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
3400  size_t sz = v->size();
3401  size_t sz2 = v2.size();
3402  size_t typesz = sizeof(float) * sz;
3403  size_t typesz2 = sizeof(float) * sz2;
3404  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3405  if (temp_sz > 0){
3406  if (temp_sz > 1){
3407  if (temp_sz > 2){
3408  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3409  }
3410  typesz = sizeof(float) * params->buffers_size[0];
3411  typesz2 = sizeof(float) * params->buffers_size[1];
3412  }
3413  else{
3414  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3415  }
3416  }
3417  cl_int error = 0;
3418  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3419  checkError(error);
3420  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3421  checkError(error);
3422  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3423  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3424  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3425 
3426  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3427  checkError (error);
3428  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3429  checkError (error);
3430 
3431  size_t size[2] = {sz, sz2};
3432  size_t work_dimension = 2;
3433 
3434  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3435  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3436  work_dimension--;
3437  }
3438  else if(temp_sz > 0){
3439  if (params->multi_dimensional){
3440  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3441  For default multidimensional global work size, leave the global_work_size vector empty, \
3442  and set multi_dimensional to true. Setting the global work size based on the values inside \
3443  the global_work_size vector.");
3444  }
3445  if (temp_sz == 1){
3446  size[0] = params->global_work_size[0];
3447  work_dimension--;
3448  }
3449  else{
3450  size[0] = params->global_work_size[0];
3451  size[1] = params->global_work_size[1];
3452  if (temp_sz > 2){
3453  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3454  }
3455  }
3456  }
3457 
3458  cl_event gpuExec;
3459 
3460  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3461 
3462  clWaitForEvents(1, &gpuExec);
3463 
3464  float *result = (float *) malloc(typesz);
3465  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3466 
3467  v->assign(result, result+sz);
3468 
3469  clReleaseCommandQueue (queue);
3470  clReleaseMemObject(buffer);
3471  clReleaseMemObject(buffer2);
3472  clReleaseEvent(gpuExec);
3473  free(result);
3474  }
3475 
3476  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, const ROS_OpenCL_Params* params){
3477  size_t sz = v->size();
3478  size_t sz2 = v2->size();
3479  size_t typesz = sizeof(float) * sz;
3480  size_t typesz2 = sizeof(float) * sz2;
3481  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3482  if (temp_sz > 0){
3483  if (temp_sz > 1){
3484  if (temp_sz > 2){
3485  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3486  }
3487  typesz = sizeof(float) * params->buffers_size[0];
3488  typesz2 = sizeof(float) * params->buffers_size[1];
3489  }
3490  else{
3491  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3492  }
3493  }
3494  cl_int error = 0;
3495  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3496  checkError(error);
3497  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3498  checkError(error);
3499  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3500  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3501  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3502 
3503  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3504  checkError (error);
3505  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3506  checkError (error);
3507 
3508  size_t size[2] = {sz, sz2};
3509  size_t work_dimension = 2;
3510 
3511  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3512  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3513  work_dimension--;
3514  }
3515  else if(temp_sz > 0){
3516  if (params->multi_dimensional){
3517  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3518  For default multidimensional global work size, leave the global_work_size vector empty, \
3519  and set multi_dimensional to true. Setting the global work size based on the values inside \
3520  the global_work_size vector.");
3521  }
3522  if (temp_sz == 1){
3523  size[0] = params->global_work_size[0];
3524  work_dimension--;
3525  }
3526  else{
3527  size[0] = params->global_work_size[0];
3528  size[1] = params->global_work_size[1];
3529  if (temp_sz > 2){
3530  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3531  }
3532  }
3533  }
3534 
3535  cl_event gpuExec;
3536  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3537 
3538  clWaitForEvents(1, &gpuExec);
3539 
3540  float *result = (float *) malloc(typesz);
3541  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3542 
3543  v->assign(result, result+sz);
3544 
3545  if (typesz2 != typesz or sz != sz2){
3546  float *result2;
3547  result2 = (float *) malloc(typesz2);
3548  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3549 
3550  v2->assign(result2, result2+sz2);
3551  free(result2);
3552  }
3553  else{
3554  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
3555 
3556  v2->assign(result, result+sz2);
3557  }
3558 
3559  clReleaseCommandQueue (queue);
3560  clReleaseMemObject(buffer);
3561  clReleaseMemObject(buffer2);
3562  clReleaseEvent(gpuExec);
3563  free(result);
3564  }
3565 
3566  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
3567  size_t sz = v.size();
3568  size_t sz2 = v2.size();
3569  size_t typesz = sizeof(float) * sz;
3570  size_t typesz2 = sizeof(double) * sz2;
3571  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3572  if (temp_sz > 0){
3573  if (temp_sz > 1){
3574  if (temp_sz > 2){
3575  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3576  }
3577  typesz = sizeof(float) * params->buffers_size[0];
3578  typesz2 = sizeof(double) * params->buffers_size[1];
3579  }
3580  else{
3581  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3582  }
3583  }
3584  cl_int error = 0;
3585  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3586  checkError(error);
3587  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3588  checkError(error);
3589  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3590  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3591  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3592 
3593  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3594  checkError (error);
3595  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3596  checkError (error);
3597 
3598  size_t size[2] = {sz, sz2};
3599  size_t work_dimension = 2;
3600 
3601  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3602  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3603  work_dimension--;
3604  }
3605  else if(temp_sz > 0){
3606  if (params->multi_dimensional){
3607  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3608  For default multidimensional global work size, leave the global_work_size vector empty, \
3609  and set multi_dimensional to true. Setting the global work size based on the values inside \
3610  the global_work_size vector.");
3611  }
3612  if (temp_sz == 1){
3613  size[0] = params->global_work_size[0];
3614  work_dimension--;
3615  }
3616  else{
3617  size[0] = params->global_work_size[0];
3618  size[1] = params->global_work_size[1];
3619  if (temp_sz > 2){
3620  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3621  }
3622  }
3623  }
3624 
3625  cl_event gpuExec;
3626 
3627  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3628 
3629  clWaitForEvents(1, &gpuExec);
3630 
3631  float *result = (float *) malloc(typesz);
3632  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3633 
3634  std::vector<float> res = std::vector<float>();
3635  res.assign(result, result+sz);
3636 
3637  clReleaseCommandQueue (queue);
3638  clReleaseMemObject(buffer);
3639  clReleaseMemObject(buffer2);
3640  clReleaseEvent(gpuExec);
3641  free(result);
3642 
3643  return res;
3644  }
3645 
3646  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
3647  size_t sz = v->size();
3648  size_t sz2 = v2.size();
3649  size_t typesz = sizeof(float) * sz;
3650  size_t typesz2 = sizeof(double) * sz2;
3651  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3652  if (temp_sz > 0){
3653  if (temp_sz > 1){
3654  if (temp_sz > 2){
3655  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3656  }
3657  typesz = sizeof(float) * params->buffers_size[0];
3658  typesz2 = sizeof(double) * params->buffers_size[1];
3659  }
3660  else{
3661  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3662  }
3663  }
3664  cl_int error = 0;
3665  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3666  checkError(error);
3667  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3668  checkError(error);
3669  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3670  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3671  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3672 
3673  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3674  checkError (error);
3675  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3676  checkError (error);
3677 
3678  size_t size[2] = {sz, sz2};
3679  size_t work_dimension = 2;
3680 
3681  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3682  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3683  work_dimension--;
3684  }
3685  else if(temp_sz > 0){
3686  if (params->multi_dimensional){
3687  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3688  For default multidimensional global work size, leave the global_work_size vector empty, \
3689  and set multi_dimensional to true. Setting the global work size based on the values inside \
3690  the global_work_size vector.");
3691  }
3692  if (temp_sz == 1){
3693  size[0] = params->global_work_size[0];
3694  work_dimension--;
3695  }
3696  else{
3697  size[0] = params->global_work_size[0];
3698  size[1] = params->global_work_size[1];
3699  if (temp_sz > 2){
3700  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3701  }
3702  }
3703  }
3704 
3705  cl_event gpuExec;
3706 
3707  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3708 
3709  clWaitForEvents(1, &gpuExec);
3710 
3711  float *result = (float *) malloc(typesz);
3712  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3713 
3714  v->assign(result, result+sz);
3715 
3716  clReleaseCommandQueue (queue);
3717  clReleaseMemObject(buffer);
3718  clReleaseMemObject(buffer2);
3719  clReleaseEvent(gpuExec);
3720  free(result);
3721  }
3722 
3723  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, const ROS_OpenCL_Params* params){
3724  size_t sz = v->size();
3725  size_t sz2 = v2->size();
3726  size_t typesz = sizeof(float) * sz;
3727  size_t typesz2 = sizeof(double) * sz2;
3728  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3729  if (temp_sz > 0){
3730  if (temp_sz > 1){
3731  if (temp_sz > 2){
3732  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3733  }
3734  typesz = sizeof(float) * params->buffers_size[0];
3735  typesz2 = sizeof(double) * params->buffers_size[1];
3736  }
3737  else{
3738  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3739  }
3740  }
3741  cl_int error = 0;
3742  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3743  checkError(error);
3744  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3745  checkError(error);
3746  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3747  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3748  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3749 
3750  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3751  checkError (error);
3752  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3753  checkError (error);
3754 
3755  size_t size[2] = {sz, sz2};
3756  size_t work_dimension = 2;
3757 
3758  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3759  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3760  work_dimension--;
3761  }
3762  else if(temp_sz > 0){
3763  if (params->multi_dimensional){
3764  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3765  For default multidimensional global work size, leave the global_work_size vector empty, \
3766  and set multi_dimensional to true. Setting the global work size based on the values inside \
3767  the global_work_size vector.");
3768  }
3769  if (temp_sz == 1){
3770  size[0] = params->global_work_size[0];
3771  work_dimension--;
3772  }
3773  else{
3774  size[0] = params->global_work_size[0];
3775  size[1] = params->global_work_size[1];
3776  if (temp_sz > 2){
3777  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3778  }
3779  }
3780  }
3781 
3782  cl_event gpuExec;
3783  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3784 
3785  clWaitForEvents(1, &gpuExec);
3786 
3787  float *result = (float *) malloc(typesz);
3788  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3789 
3790  v->assign(result, result+sz);
3791 
3792  double *result2 = (double *) malloc(typesz2);
3793  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3794 
3795  v2->assign(result2, result2+sz2);
3796 
3797  clReleaseCommandQueue (queue);
3798  clReleaseMemObject(buffer);
3799  clReleaseMemObject(buffer2);
3800  clReleaseEvent(gpuExec);
3801  free(result);
3802  free(result2);
3803  }
3804 
3805  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
3806  size_t sz = v.size();
3807  size_t sz2 = v2.size();
3808  size_t typesz = sizeof(double) * sz;
3809  size_t typesz2 = sizeof(char) * sz2;
3810  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3811  if (temp_sz > 0){
3812  if (temp_sz > 1){
3813  if (temp_sz > 2){
3814  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3815  }
3816  typesz = sizeof(double) * params->buffers_size[0];
3817  typesz2 = sizeof(char) * params->buffers_size[1];
3818  }
3819  else{
3820  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3821  }
3822  }
3823  cl_int error = 0;
3824  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3825  checkError(error);
3826  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3827  checkError(error);
3828  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3829  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3830  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3831 
3832  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3833  checkError (error);
3834  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3835  checkError (error);
3836 
3837  size_t size[2] = {sz, sz2};
3838  size_t work_dimension = 2;
3839 
3840  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3841  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3842  work_dimension--;
3843  }
3844  else if(temp_sz > 0){
3845  if (params->multi_dimensional){
3846  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3847  For default multidimensional global work size, leave the global_work_size vector empty, \
3848  and set multi_dimensional to true. Setting the global work size based on the values inside \
3849  the global_work_size vector.");
3850  }
3851  if (temp_sz == 1){
3852  size[0] = params->global_work_size[0];
3853  work_dimension--;
3854  }
3855  else{
3856  size[0] = params->global_work_size[0];
3857  size[1] = params->global_work_size[1];
3858  if (temp_sz > 2){
3859  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3860  }
3861  }
3862  }
3863 
3864  cl_event gpuExec;
3865 
3866  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3867 
3868  clWaitForEvents(1, &gpuExec);
3869 
3870  double *result = (double *) malloc(typesz);
3871  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3872 
3873  std::vector<double> res = std::vector<double>();
3874  res.assign(result, result+sz);
3875 
3876  clReleaseCommandQueue (queue);
3877  clReleaseMemObject(buffer);
3878  clReleaseMemObject(buffer2);
3879  clReleaseEvent(gpuExec);
3880  free(result);
3881 
3882  return res;
3883  }
3884 
3885  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<char> v2, const ROS_OpenCL_Params* params){
3886  size_t sz = v->size();
3887  size_t sz2 = v2.size();
3888  size_t typesz = sizeof(double) * sz;
3889  size_t typesz2 = sizeof(char) * sz2;
3890  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3891  if (temp_sz > 0){
3892  if (temp_sz > 1){
3893  if (temp_sz > 2){
3894  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3895  }
3896  typesz = sizeof(double) * params->buffers_size[0];
3897  typesz2 = sizeof(char) * params->buffers_size[1];
3898  }
3899  else{
3900  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3901  }
3902  }
3903  cl_int error = 0;
3904  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3905  checkError(error);
3906  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3907  checkError(error);
3908  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3909  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3910  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3911 
3912  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3913  checkError (error);
3914  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3915  checkError (error);
3916 
3917  size_t size[2] = {sz, sz2};
3918  size_t work_dimension = 2;
3919 
3920  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3921  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3922  work_dimension--;
3923  }
3924  else if(temp_sz > 0){
3925  if (params->multi_dimensional){
3926  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
3927  For default multidimensional global work size, leave the global_work_size vector empty, \
3928  and set multi_dimensional to true. Setting the global work size based on the values inside \
3929  the global_work_size vector.");
3930  }
3931  if (temp_sz == 1){
3932  size[0] = params->global_work_size[0];
3933  work_dimension--;
3934  }
3935  else{
3936  size[0] = params->global_work_size[0];
3937  size[1] = params->global_work_size[1];
3938  if (temp_sz > 2){
3939  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3940  }
3941  }
3942  }
3943 
3944  cl_event gpuExec;
3945 
3946  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3947 
3948  clWaitForEvents(1, &gpuExec);
3949 
3950  double *result = (double *) malloc(typesz);
3951  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3952 
3953  v->assign(result, result+sz);
3954 
3955  clReleaseCommandQueue (queue);
3956  clReleaseMemObject(buffer);
3957  clReleaseMemObject(buffer2);
3958  clReleaseEvent(gpuExec);
3959  free(result);
3960  }
3961 
3962  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, const ROS_OpenCL_Params* params){
3963  size_t sz = v->size();
3964  size_t sz2 = v2->size();
3965  size_t typesz = sizeof(double) * sz;
3966  size_t typesz2 = sizeof(char) * sz2;
3967  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
3968  if (temp_sz > 0){
3969  if (temp_sz > 1){
3970  if (temp_sz > 2){
3971  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3972  }
3973  typesz = sizeof(double) * params->buffers_size[0];
3974  typesz2 = sizeof(char) * params->buffers_size[1];
3975  }
3976  else{
3977  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3978  }
3979  }
3980  cl_int error = 0;
3981  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3982  checkError(error);
3983  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3984  checkError(error);
3985  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
3986  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
3987  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
3988 
3989  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3990  checkError (error);
3991  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3992  checkError (error);
3993 
3994  size_t size[2] = {sz, sz2};
3995  size_t work_dimension = 2;
3996 
3997  temp_sz = params != NULL ? params->global_work_size.size() : 0;
3998  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
3999  work_dimension--;
4000  }
4001  else if(temp_sz > 0){
4002  if (params->multi_dimensional){
4003  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4004  For default multidimensional global work size, leave the global_work_size vector empty, \
4005  and set multi_dimensional to true. Setting the global work size based on the values inside \
4006  the global_work_size vector.");
4007  }
4008  if (temp_sz == 1){
4009  size[0] = params->global_work_size[0];
4010  work_dimension--;
4011  }
4012  else{
4013  size[0] = params->global_work_size[0];
4014  size[1] = params->global_work_size[1];
4015  if (temp_sz > 2){
4016  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4017  }
4018  }
4019  }
4020 
4021  cl_event gpuExec;
4022  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4023 
4024  clWaitForEvents(1, &gpuExec);
4025 
4026  double *result = (double *) malloc(typesz);
4027  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4028 
4029  v->assign(result, result+sz);
4030 
4031  char *result2 = (char *) malloc(typesz2);
4032  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4033 
4034  v2->assign(result2, result2+sz2);
4035 
4036  clReleaseCommandQueue (queue);
4037  clReleaseMemObject(buffer);
4038  clReleaseMemObject(buffer2);
4039  clReleaseEvent(gpuExec);
4040  free(result);
4041  free(result2);
4042  }
4043 
4044  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
4045  size_t sz = v.size();
4046  size_t sz2 = v2.size();
4047  size_t typesz = sizeof(double) * sz;
4048  size_t typesz2 = sizeof(int) * sz2;
4049  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4050  if (temp_sz > 0){
4051  if (temp_sz > 1){
4052  if (temp_sz > 2){
4053  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4054  }
4055  typesz = sizeof(double) * params->buffers_size[0];
4056  typesz2 = sizeof(int) * params->buffers_size[1];
4057  }
4058  else{
4059  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4060  }
4061  }
4062  cl_int error = 0;
4063  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4064  checkError(error);
4065  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4066  checkError(error);
4067  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4068  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4069  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4070 
4071  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4072  checkError (error);
4073  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4074  checkError (error);
4075 
4076  size_t size[2] = {sz, sz2};
4077  size_t work_dimension = 2;
4078 
4079  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4080  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4081  work_dimension--;
4082  }
4083  else if(temp_sz > 0){
4084  if (params->multi_dimensional){
4085  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4086  For default multidimensional global work size, leave the global_work_size vector empty, \
4087  and set multi_dimensional to true. Setting the global work size based on the values inside \
4088  the global_work_size vector.");
4089  }
4090  if (temp_sz == 1){
4091  size[0] = params->global_work_size[0];
4092  work_dimension--;
4093  }
4094  else{
4095  size[0] = params->global_work_size[0];
4096  size[1] = params->global_work_size[1];
4097  if (temp_sz > 2){
4098  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4099  }
4100  }
4101  }
4102 
4103  cl_event gpuExec;
4104 
4105  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4106 
4107  clWaitForEvents(1, &gpuExec);
4108 
4109  double *result = (double *) malloc(typesz);
4110  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4111 
4112  std::vector<double> res = std::vector<double>();
4113  res.assign(result, result+sz);
4114 
4115  clReleaseCommandQueue (queue);
4116  clReleaseMemObject(buffer);
4117  clReleaseMemObject(buffer2);
4118  clReleaseEvent(gpuExec);
4119  free(result);
4120 
4121  return res;
4122  }
4123 
4124  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<int> v2, const ROS_OpenCL_Params* params){
4125  size_t sz = v->size();
4126  size_t sz2 = v2.size();
4127  size_t typesz = sizeof(double) * sz;
4128  size_t typesz2 = sizeof(int) * sz2;
4129  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4130  if (temp_sz > 0){
4131  if (temp_sz > 1){
4132  if (temp_sz > 2){
4133  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4134  }
4135  typesz = sizeof(double) * params->buffers_size[0];
4136  typesz2 = sizeof(int) * params->buffers_size[1];
4137  }
4138  else{
4139  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4140  }
4141  }
4142  cl_int error = 0;
4143  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4144  checkError(error);
4145  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4146  checkError(error);
4147  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4148  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4149  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4150 
4151  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4152  checkError (error);
4153  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4154  checkError (error);
4155 
4156  size_t size[2] = {sz, sz2};
4157  size_t work_dimension = 2;
4158 
4159  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4160  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4161  work_dimension--;
4162  }
4163  else if(temp_sz > 0){
4164  if (params->multi_dimensional){
4165  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4166  For default multidimensional global work size, leave the global_work_size vector empty, \
4167  and set multi_dimensional to true. Setting the global work size based on the values inside \
4168  the global_work_size vector.");
4169  }
4170  if (temp_sz == 1){
4171  size[0] = params->global_work_size[0];
4172  work_dimension--;
4173  }
4174  else{
4175  size[0] = params->global_work_size[0];
4176  size[1] = params->global_work_size[1];
4177  if (temp_sz > 2){
4178  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4179  }
4180  }
4181  }
4182 
4183  cl_event gpuExec;
4184 
4185  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4186 
4187  clWaitForEvents(1, &gpuExec);
4188 
4189  double *result = (double *) malloc(typesz);
4190  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4191 
4192  v->assign(result, result+sz);
4193 
4194  clReleaseCommandQueue (queue);
4195  clReleaseMemObject(buffer);
4196  clReleaseMemObject(buffer2);
4197  clReleaseEvent(gpuExec);
4198  free(result);
4199  }
4200 
4201  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, const ROS_OpenCL_Params* params){
4202  size_t sz = v->size();
4203  size_t sz2 = v2->size();
4204  size_t typesz = sizeof(double) * sz;
4205  size_t typesz2 = sizeof(int) * sz2;
4206  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4207  if (temp_sz > 0){
4208  if (temp_sz > 1){
4209  if (temp_sz > 2){
4210  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4211  }
4212  typesz = sizeof(double) * params->buffers_size[0];
4213  typesz2 = sizeof(int) * params->buffers_size[1];
4214  }
4215  else{
4216  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4217  }
4218  }
4219  cl_int error = 0;
4220  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4221  checkError(error);
4222  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4223  checkError(error);
4224  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4225  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4226  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4227 
4228  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4229  checkError (error);
4230  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4231  checkError (error);
4232 
4233  size_t size[2] = {sz, sz2};
4234  size_t work_dimension = 2;
4235 
4236  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4237  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4238  work_dimension--;
4239  }
4240  else if(temp_sz > 0){
4241  if (params->multi_dimensional){
4242  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4243  For default multidimensional global work size, leave the global_work_size vector empty, \
4244  and set multi_dimensional to true. Setting the global work size based on the values inside \
4245  the global_work_size vector.");
4246  }
4247  if (temp_sz == 1){
4248  size[0] = params->global_work_size[0];
4249  work_dimension--;
4250  }
4251  else{
4252  size[0] = params->global_work_size[0];
4253  size[1] = params->global_work_size[1];
4254  if (temp_sz > 2){
4255  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4256  }
4257  }
4258  }
4259 
4260  cl_event gpuExec;
4261  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4262 
4263  clWaitForEvents(1, &gpuExec);
4264 
4265  double *result = (double *) malloc(typesz);
4266  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4267 
4268  v->assign(result, result+sz);
4269 
4270  int *result2 = (int *) malloc(typesz2);
4271  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4272 
4273  v2->assign(result2, result2+sz2);
4274 
4275  clReleaseCommandQueue (queue);
4276  clReleaseMemObject(buffer);
4277  clReleaseMemObject(buffer2);
4278  clReleaseEvent(gpuExec);
4279  free(result);
4280  free(result2);
4281  }
4282 
4283  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
4284  size_t sz = v.size();
4285  size_t sz2 = v2.size();
4286  size_t typesz = sizeof(double) * sz;
4287  size_t typesz2 = sizeof(float) * sz2;
4288  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4289  if (temp_sz > 0){
4290  if (temp_sz > 1){
4291  if (temp_sz > 2){
4292  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4293  }
4294  typesz = sizeof(double) * params->buffers_size[0];
4295  typesz2 = sizeof(float) * params->buffers_size[1];
4296  }
4297  else{
4298  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4299  }
4300  }
4301  cl_int error = 0;
4302  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4303  checkError(error);
4304  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4305  checkError(error);
4306  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4307  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4308  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4309 
4310  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4311  checkError (error);
4312  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4313  checkError (error);
4314 
4315  size_t size[2] = {sz, sz2};
4316  size_t work_dimension = 2;
4317 
4318  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4319  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4320  work_dimension--;
4321  }
4322  else if(temp_sz > 0){
4323  if (params->multi_dimensional){
4324  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4325  For default multidimensional global work size, leave the global_work_size vector empty, \
4326  and set multi_dimensional to true. Setting the global work size based on the values inside \
4327  the global_work_size vector.");
4328  }
4329  if (temp_sz == 1){
4330  size[0] = params->global_work_size[0];
4331  work_dimension--;
4332  }
4333  else{
4334  size[0] = params->global_work_size[0];
4335  size[1] = params->global_work_size[1];
4336  if (temp_sz > 2){
4337  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4338  }
4339  }
4340  }
4341 
4342  cl_event gpuExec;
4343 
4344  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4345 
4346  clWaitForEvents(1, &gpuExec);
4347 
4348  double *result = (double *) malloc(typesz);
4349  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4350 
4351  std::vector<double> res = std::vector<double>();
4352  res.assign(result, result+sz);
4353 
4354  clReleaseCommandQueue (queue);
4355  clReleaseMemObject(buffer);
4356  clReleaseMemObject(buffer2);
4357  clReleaseEvent(gpuExec);
4358  free(result);
4359 
4360  return res;
4361  }
4362 
4363  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<float> v2, const ROS_OpenCL_Params* params){
4364  size_t sz = v->size();
4365  size_t sz2 = v2.size();
4366  size_t typesz = sizeof(double) * sz;
4367  size_t typesz2 = sizeof(float) * sz2;
4368  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4369  if (temp_sz > 0){
4370  if (temp_sz > 1){
4371  if (temp_sz > 2){
4372  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4373  }
4374  typesz = sizeof(double) * params->buffers_size[0];
4375  typesz2 = sizeof(float) * params->buffers_size[1];
4376  }
4377  else{
4378  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4379  }
4380  }
4381  cl_int error = 0;
4382  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4383  checkError(error);
4384  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4385  checkError(error);
4386  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4387  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4388  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4389 
4390  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4391  checkError (error);
4392  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4393  checkError (error);
4394 
4395  size_t size[2] = {sz, sz2};
4396  size_t work_dimension = 2;
4397 
4398  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4399  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4400  work_dimension--;
4401  }
4402  else if(temp_sz > 0){
4403  if (params->multi_dimensional){
4404  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4405  For default multidimensional global work size, leave the global_work_size vector empty, \
4406  and set multi_dimensional to true. Setting the global work size based on the values inside \
4407  the global_work_size vector.");
4408  }
4409  if (temp_sz == 1){
4410  size[0] = params->global_work_size[0];
4411  work_dimension--;
4412  }
4413  else{
4414  size[0] = params->global_work_size[0];
4415  size[1] = params->global_work_size[1];
4416  if (temp_sz > 2){
4417  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4418  }
4419  }
4420  }
4421 
4422  cl_event gpuExec;
4423 
4424  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4425 
4426  clWaitForEvents(1, &gpuExec);
4427 
4428  double *result = (double *) malloc(typesz);
4429  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4430 
4431  v->assign(result, result+sz);
4432 
4433  clReleaseCommandQueue (queue);
4434  clReleaseMemObject(buffer);
4435  clReleaseMemObject(buffer2);
4436  clReleaseEvent(gpuExec);
4437  free(result);
4438  }
4439 
4440  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, const ROS_OpenCL_Params* params){
4441  size_t sz = v->size();
4442  size_t sz2 = v2->size();
4443  size_t typesz = sizeof(double) * sz;
4444  size_t typesz2 = sizeof(float) * sz2;
4445  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4446  if (temp_sz > 0){
4447  if (temp_sz > 1){
4448  if (temp_sz > 2){
4449  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4450  }
4451  typesz = sizeof(double) * params->buffers_size[0];
4452  typesz2 = sizeof(float) * params->buffers_size[1];
4453  }
4454  else{
4455  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4456  }
4457  }
4458  cl_int error = 0;
4459  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4460  checkError(error);
4461  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4462  checkError(error);
4463  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4464  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4465  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4466 
4467  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4468  checkError (error);
4469  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4470  checkError (error);
4471 
4472  size_t size[2] = {sz, sz2};
4473  size_t work_dimension = 2;
4474 
4475  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4476  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4477  work_dimension--;
4478  }
4479  else if(temp_sz > 0){
4480  if (params->multi_dimensional){
4481  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4482  For default multidimensional global work size, leave the global_work_size vector empty, \
4483  and set multi_dimensional to true. Setting the global work size based on the values inside \
4484  the global_work_size vector.");
4485  }
4486  if (temp_sz == 1){
4487  size[0] = params->global_work_size[0];
4488  work_dimension--;
4489  }
4490  else{
4491  size[0] = params->global_work_size[0];
4492  size[1] = params->global_work_size[1];
4493  if (temp_sz > 2){
4494  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4495  }
4496  }
4497  }
4498 
4499  cl_event gpuExec;
4500  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4501 
4502  clWaitForEvents(1, &gpuExec);
4503 
4504  double *result = (double *) malloc(typesz);
4505  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4506 
4507  v->assign(result, result+sz);
4508 
4509  float *result2 = (float *) malloc(typesz2);
4510  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4511 
4512  v2->assign(result2, result2+sz2);
4513 
4514  clReleaseCommandQueue (queue);
4515  clReleaseMemObject(buffer);
4516  clReleaseMemObject(buffer2);
4517  clReleaseEvent(gpuExec);
4518  free(result);
4519  free(result2);
4520  }
4521 
4522  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
4523  size_t sz = v.size();
4524  size_t sz2 = v2.size();
4525  size_t typesz = sizeof(double) * sz;
4526  size_t typesz2 = sizeof(double) * sz2;
4527  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4528  if (temp_sz > 0){
4529  if (temp_sz > 1){
4530  if (temp_sz > 2){
4531  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4532  }
4533  typesz = sizeof(double) * params->buffers_size[0];
4534  typesz2 = sizeof(double) * params->buffers_size[1];
4535  }
4536  else{
4537  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4538  }
4539  }
4540  cl_int error = 0;
4541  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4542  checkError(error);
4543  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4544  checkError(error);
4545  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4546  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4547  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4548 
4549  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4550  checkError (error);
4551  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4552  checkError (error);
4553 
4554  size_t size[2] = {sz, sz2};
4555  size_t work_dimension = 2;
4556 
4557  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4558  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4559  work_dimension--;
4560  }
4561  else if(temp_sz > 0){
4562  if (params->multi_dimensional){
4563  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4564  For default multidimensional global work size, leave the global_work_size vector empty, \
4565  and set multi_dimensional to true. Setting the global work size based on the values inside \
4566  the global_work_size vector.");
4567  }
4568  if (temp_sz == 1){
4569  size[0] = params->global_work_size[0];
4570  work_dimension--;
4571  }
4572  else{
4573  size[0] = params->global_work_size[0];
4574  size[1] = params->global_work_size[1];
4575  if (temp_sz > 2){
4576  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4577  }
4578  }
4579  }
4580 
4581  cl_event gpuExec;
4582 
4583  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4584 
4585  clWaitForEvents(1, &gpuExec);
4586 
4587  double *result = (double *) malloc(typesz);
4588  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4589 
4590  std::vector<double> res = std::vector<double>();
4591  res.assign(result, result+sz);
4592 
4593  clReleaseCommandQueue (queue);
4594  clReleaseMemObject(buffer);
4595  clReleaseMemObject(buffer2);
4596  clReleaseEvent(gpuExec);
4597  free(result);
4598 
4599  return res;
4600  }
4601 
4602  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<double> v2, const ROS_OpenCL_Params* params){
4603  size_t sz = v->size();
4604  size_t sz2 = v2.size();
4605  size_t typesz = sizeof(double) * sz;
4606  size_t typesz2 = sizeof(double) * sz2;
4607  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4608  if (temp_sz > 0){
4609  if (temp_sz > 1){
4610  if (temp_sz > 2){
4611  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4612  }
4613  typesz = sizeof(double) * params->buffers_size[0];
4614  typesz2 = sizeof(double) * params->buffers_size[1];
4615  }
4616  else{
4617  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4618  }
4619  }
4620  cl_int error = 0;
4621  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4622  checkError(error);
4623  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4624  checkError(error);
4625  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4626  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4627  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4628 
4629  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4630  checkError (error);
4631  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4632  checkError (error);
4633 
4634  size_t size[2] = {sz, sz2};
4635  size_t work_dimension = 2;
4636 
4637  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4638  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4639  work_dimension--;
4640  }
4641  else if(temp_sz > 0){
4642  if (params->multi_dimensional){
4643  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4644  For default multidimensional global work size, leave the global_work_size vector empty, \
4645  and set multi_dimensional to true. Setting the global work size based on the values inside \
4646  the global_work_size vector.");
4647  }
4648  if (temp_sz == 1){
4649  size[0] = params->global_work_size[0];
4650  work_dimension--;
4651  }
4652  else{
4653  size[0] = params->global_work_size[0];
4654  size[1] = params->global_work_size[1];
4655  if (temp_sz > 2){
4656  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4657  }
4658  }
4659  }
4660 
4661  cl_event gpuExec;
4662 
4663  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4664 
4665  clWaitForEvents(1, &gpuExec);
4666 
4667  double *result = (double *) malloc(typesz);
4668  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4669 
4670  v->assign(result, result+sz);
4671 
4672  clReleaseCommandQueue (queue);
4673  clReleaseMemObject(buffer);
4674  clReleaseMemObject(buffer2);
4675  clReleaseEvent(gpuExec);
4676  free(result);
4677  }
4678 
4679  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, const ROS_OpenCL_Params* params){
4680  size_t sz = v->size();
4681  size_t sz2 = v2->size();
4682  size_t typesz = sizeof(double) * sz;
4683  size_t typesz2 = sizeof(double) * sz2;
4684  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4685  if (temp_sz > 0){
4686  if (temp_sz > 1){
4687  if (temp_sz > 2){
4688  ROS_WARN("buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4689  }
4690  typesz = sizeof(double) * params->buffers_size[0];
4691  typesz2 = sizeof(double) * params->buffers_size[1];
4692  }
4693  else{
4694  ROS_WARN("buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4695  }
4696  }
4697  cl_int error = 0;
4698  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4699  checkError(error);
4700  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4701  checkError(error);
4702  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4703  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4704  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4705 
4706  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4707  checkError (error);
4708  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4709  checkError (error);
4710 
4711  size_t size[2] = {sz, sz2};
4712  size_t work_dimension = 2;
4713 
4714  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4715  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4716  work_dimension--;
4717  }
4718  else if(temp_sz > 0){
4719  if (params->multi_dimensional){
4720  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4721  For default multidimensional global work size, leave the global_work_size vector empty, \
4722  and set multi_dimensional to true. Setting the global work size based on the values inside \
4723  the global_work_size vector.");
4724  }
4725  if (temp_sz == 1){
4726  size[0] = params->global_work_size[0];
4727  work_dimension--;
4728  }
4729  else{
4730  size[0] = params->global_work_size[0];
4731  size[1] = params->global_work_size[1];
4732  if (temp_sz > 2){
4733  ROS_WARN("global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4734  }
4735  }
4736  }
4737 
4738  cl_event gpuExec;
4739  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4740 
4741  clWaitForEvents(1, &gpuExec);
4742 
4743  double *result = (double *) malloc(typesz);
4744  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4745 
4746  v->assign(result, result+sz);
4747 
4748  if (typesz2 != typesz or sz != sz2){
4749  double *result2;
4750  result2 = (double *) malloc(typesz2);
4751  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4752 
4753  v2->assign(result2, result2+sz2);
4754  free(result2);
4755  }
4756  else{
4757  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
4758 
4759  v2->assign(result, result+sz2);
4760  }
4761 
4762  clReleaseCommandQueue (queue);
4763  clReleaseMemObject(buffer);
4764  clReleaseMemObject(buffer2);
4765  clReleaseEvent(gpuExec);
4766  free(result);
4767  }
4768 
4769 
4770 
4771  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
4772  size_t sz = v.size();
4773  size_t sz2 = v2.size();
4774  size_t sz3 = v3.size();
4775  size_t typesz = sizeof(char) * sz;
4776  size_t typesz2 = sizeof(char) * sz2;
4777  size_t typesz3 = sizeof(char) * sz3;
4778  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4779  if (temp_sz > 0){
4780  if (temp_sz > 2){
4781  if (temp_sz > 3){
4782  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4783  }
4784  typesz = sizeof(char) * params->buffers_size[0];
4785  typesz2 = sizeof(char) * params->buffers_size[1];
4786  typesz3 = sizeof(char) * params->buffers_size[2];
4787  }
4788  else{
4789  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4790  }
4791  }
4792  cl_int error = 0;
4793  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4794  checkError(error);
4795  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4796  checkError(error);
4797  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4798  checkError(error);
4799  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4800  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4801  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
4802  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4803 
4804  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4805  checkError (error);
4806  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4807  checkError (error);
4808  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
4809  checkError (error);
4810 
4811  size_t size[3] = {sz, sz2, sz3};
4812  size_t work_dimension = 3;
4813 
4814  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4815  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4816  work_dimension = 1;
4817  }
4818  else if(temp_sz > 0){
4819  if (params->multi_dimensional){
4820  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4821  For default multidimensional global work size, leave the global_work_size vector empty, \
4822  and set multi_dimensional to true. Setting the global work size based on the values inside \
4823  the global_work_size vector.");
4824  }
4825  if (temp_sz == 1){
4826  size[0] = params->global_work_size[0];
4827  work_dimension = 1;
4828  }
4829  else if (temp_sz == 2){
4830  size[0] = params->global_work_size[0];
4831  size[1] = params->global_work_size[1];
4832  work_dimension = 2;
4833  }
4834  else{
4835  size[0] = params->global_work_size[0];
4836  size[1] = params->global_work_size[1];
4837  size[2] = params->global_work_size[2];
4838  if (temp_sz > 3){
4839  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
4840  }
4841  }
4842  }
4843 
4844  cl_event gpuExec;
4845 
4846  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4847 
4848  clWaitForEvents(1, &gpuExec);
4849 
4850  char *result = (char *) malloc(typesz);
4851  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4852 
4853  std::vector<char> res = std::vector<char>();
4854  res.assign(result, result+sz);
4855 
4856  clReleaseCommandQueue (queue);
4857  clReleaseMemObject(buffer);
4858  clReleaseMemObject(buffer2);
4859  clReleaseMemObject(buffer3);
4860  clReleaseEvent(gpuExec);
4861  free(result);
4862 
4863  return res;
4864  }
4865 
4866  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
4867  size_t sz = v->size();
4868  size_t sz2 = v2.size();
4869  size_t sz3 = v3.size();
4870  size_t typesz = sizeof(char) * sz;
4871  size_t typesz2 = sizeof(char) * sz2;
4872  size_t typesz3 = sizeof(char) * sz3;
4873  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4874  if (temp_sz > 0){
4875  if (temp_sz > 2){
4876  if (temp_sz > 3){
4877  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4878  }
4879  typesz = sizeof(char) * params->buffers_size[0];
4880  typesz2 = sizeof(char) * params->buffers_size[1];
4881  typesz3 = sizeof(char) * params->buffers_size[2];
4882  }
4883  else{
4884  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4885  }
4886  }
4887  cl_int error = 0;
4888  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4889  checkError(error);
4890  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4891  checkError(error);
4892  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4893  checkError(error);
4894  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4895  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4896  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
4897  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4898 
4899  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4900  checkError (error);
4901  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4902  checkError (error);
4903  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
4904  checkError (error);
4905 
4906  size_t size[3] = {sz, sz2, sz3};
4907  size_t work_dimension = 3;
4908 
4909  temp_sz = params != NULL ? params->global_work_size.size() : 0;
4910  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
4911  work_dimension = 1;
4912  }
4913  else if(temp_sz > 0){
4914  if (params->multi_dimensional){
4915  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
4916  For default multidimensional global work size, leave the global_work_size vector empty, \
4917  and set multi_dimensional to true. Setting the global work size based on the values inside \
4918  the global_work_size vector.");
4919  }
4920  if (temp_sz == 1){
4921  size[0] = params->global_work_size[0];
4922  work_dimension = 1;
4923  }
4924  else if (temp_sz == 2){
4925  size[0] = params->global_work_size[0];
4926  size[1] = params->global_work_size[1];
4927  work_dimension = 2;
4928  }
4929  else{
4930  size[0] = params->global_work_size[0];
4931  size[1] = params->global_work_size[1];
4932  size[2] = params->global_work_size[2];
4933  if (temp_sz > 3){
4934  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
4935  }
4936  }
4937  }
4938 
4939  cl_event gpuExec;
4940 
4941  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4942 
4943  clWaitForEvents(1, &gpuExec);
4944 
4945  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4946 
4947  clWaitForEvents(1, &gpuExec);
4948 
4949  char *result = (char *) malloc(typesz);
4950  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4951 
4952  v->assign(result, result+sz);
4953 
4954  clReleaseCommandQueue (queue);
4955  clReleaseMemObject(buffer);
4956  clReleaseMemObject(buffer2);
4957  clReleaseMemObject(buffer3);
4958  clReleaseEvent(gpuExec);
4959  free(result);
4960  }
4961 
4962  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
4963  size_t sz = v->size();
4964  size_t sz2 = v2->size();
4965  size_t sz3 = v3.size();
4966  size_t typesz = sizeof(char) * sz;
4967  size_t typesz2 = sizeof(char) * sz2;
4968  size_t typesz3 = sizeof(char) * sz3;
4969  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
4970  if (temp_sz > 0){
4971  if (temp_sz > 2){
4972  if (temp_sz > 3){
4973  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4974  }
4975  typesz = sizeof(char) * params->buffers_size[0];
4976  typesz2 = sizeof(char) * params->buffers_size[1];
4977  typesz3 = sizeof(char) * params->buffers_size[2];
4978  }
4979  else{
4980  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4981  }
4982  }
4983  cl_int error = 0;
4984  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4985  checkError(error);
4986  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4987  checkError(error);
4988  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4989  checkError(error);
4990  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
4991  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
4992  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
4993  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
4994 
4995  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4996  checkError (error);
4997  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4998  checkError (error);
4999  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5000  checkError (error);
5001 
5002  size_t size[3] = {sz, sz2, sz3};
5003  size_t work_dimension = 3;
5004 
5005  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5006  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5007  work_dimension = 1;
5008  }
5009  else if(temp_sz > 0){
5010  if (params->multi_dimensional){
5011  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5012  For default multidimensional global work size, leave the global_work_size vector empty, \
5013  and set multi_dimensional to true. Setting the global work size based on the values inside \
5014  the global_work_size vector.");
5015  }
5016  if (temp_sz == 1){
5017  size[0] = params->global_work_size[0];
5018  work_dimension = 1;
5019  }
5020  else if (temp_sz == 2){
5021  size[0] = params->global_work_size[0];
5022  size[1] = params->global_work_size[1];
5023  work_dimension = 2;
5024  }
5025  else{
5026  size[0] = params->global_work_size[0];
5027  size[1] = params->global_work_size[1];
5028  size[2] = params->global_work_size[2];
5029  if (temp_sz > 3){
5030  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5031  }
5032  }
5033  }
5034 
5035  cl_event gpuExec;
5036 
5037  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5038 
5039  clWaitForEvents(1, &gpuExec);
5040 
5041  char *result = (char *) malloc(typesz);
5042  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5043 
5044  v->assign(result, result+sz);
5045 
5046  if (typesz2 != typesz or sz != sz2){
5047  char *result2;
5048  result2 = (char *) malloc(typesz2);
5049  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5050 
5051  v2->assign(result2, result2+sz2);
5052  free(result2);
5053  }
5054  else{
5055  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5056 
5057  v2->assign(result, result+sz2);
5058  }
5059 
5060  clReleaseCommandQueue (queue);
5061  clReleaseMemObject(buffer);
5062  clReleaseMemObject(buffer2);
5063  clReleaseMemObject(buffer3);
5064  clReleaseEvent(gpuExec);
5065  free(result);
5066  }
5067 
5068  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
5069  size_t sz = v->size();
5070  size_t sz2 = v2->size();
5071  size_t sz3 = v3->size();
5072  size_t typesz = sizeof(char) * sz;
5073  size_t typesz2 = sizeof(char) * sz2;
5074  size_t typesz3 = sizeof(char) * sz3;
5075  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5076  if (temp_sz > 0){
5077  if (temp_sz > 2){
5078  if (temp_sz > 3){
5079  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5080  }
5081  typesz = sizeof(char) * params->buffers_size[0];
5082  typesz2 = sizeof(char) * params->buffers_size[1];
5083  typesz3 = sizeof(char) * params->buffers_size[2];
5084  }
5085  else{
5086  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5087  }
5088  }
5089  cl_int error = 0;
5090  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5091  checkError(error);
5092  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5093  checkError(error);
5094  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5095  checkError(error);
5096  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5097  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5098  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5099  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5100 
5101  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5102  checkError (error);
5103  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5104  checkError (error);
5105  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5106  checkError (error);
5107 
5108  size_t size[3] = {sz, sz2, sz3};
5109  size_t work_dimension = 3;
5110 
5111  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5112  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5113  work_dimension = 1;
5114  }
5115  else if(temp_sz > 0){
5116  if (params->multi_dimensional){
5117  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5118  For default multidimensional global work size, leave the global_work_size vector empty, \
5119  and set multi_dimensional to true. Setting the global work size based on the values inside \
5120  the global_work_size vector.");
5121  }
5122  if (temp_sz == 1){
5123  size[0] = params->global_work_size[0];
5124  work_dimension = 1;
5125  }
5126  else if (temp_sz == 2){
5127  size[0] = params->global_work_size[0];
5128  size[1] = params->global_work_size[1];
5129  work_dimension = 2;
5130  }
5131  else{
5132  size[0] = params->global_work_size[0];
5133  size[1] = params->global_work_size[1];
5134  size[2] = params->global_work_size[2];
5135  if (temp_sz > 3){
5136  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5137  }
5138  }
5139  }
5140 
5141  cl_event gpuExec;
5142 
5143  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5144 
5145  clWaitForEvents(1, &gpuExec);
5146 
5147  char *result = (char *) malloc(typesz);
5148  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5149 
5150  v->assign(result, result+sz);
5151 
5152  if (typesz2 != typesz or sz != sz2){
5153  char *result2;
5154  result2 = (char *) malloc(typesz2);
5155  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5156 
5157  v2->assign(result2, result2+sz2);
5158  free(result2);
5159  }
5160  else{
5161  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5162 
5163  v2->assign(result, result+sz2);
5164  }
5165 
5166  if (typesz3 != typesz or sz != sz3){
5167  char *result3;
5168  result3 = (char *) malloc(typesz3);
5169  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
5170 
5171  v3->assign(result3, result3+sz3);
5172  free(result3);
5173  }
5174  else{
5175  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5176 
5177  v3->assign(result, result+sz3);
5178  }
5179 
5180  clReleaseCommandQueue (queue);
5181  clReleaseMemObject(buffer);
5182  clReleaseMemObject(buffer2);
5183  clReleaseMemObject(buffer3);
5184  clReleaseEvent(gpuExec);
5185  free(result);
5186  }
5187 
5188 
5189  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
5190  size_t sz = v.size();
5191  size_t sz2 = v2.size();
5192  size_t sz3 = v3.size();
5193  size_t typesz = sizeof(char) * sz;
5194  size_t typesz2 = sizeof(char) * sz2;
5195  size_t typesz3 = sizeof(int) * sz3;
5196  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5197  if (temp_sz > 0){
5198  if (temp_sz > 2){
5199  if (temp_sz > 3){
5200  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5201  }
5202  typesz = sizeof(char) * params->buffers_size[0];
5203  typesz2 = sizeof(char) * params->buffers_size[1];
5204  typesz3 = sizeof(int) * params->buffers_size[2];
5205  }
5206  else{
5207  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5208  }
5209  }
5210  cl_int error = 0;
5211  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5212  checkError(error);
5213  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5214  checkError(error);
5215  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5216  checkError(error);
5217  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5218  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5219  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5220  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5221 
5222  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
5223  checkError (error);
5224  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5225  checkError (error);
5226  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5227  checkError (error);
5228 
5229  size_t size[3] = {sz, sz2, sz3};
5230  size_t work_dimension = 3;
5231 
5232  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5233  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5234  work_dimension = 1;
5235  }
5236  else if(temp_sz > 0){
5237  if (params->multi_dimensional){
5238  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5239  For default multidimensional global work size, leave the global_work_size vector empty, \
5240  and set multi_dimensional to true. Setting the global work size based on the values inside \
5241  the global_work_size vector.");
5242  }
5243  if (temp_sz == 1){
5244  size[0] = params->global_work_size[0];
5245  work_dimension = 1;
5246  }
5247  else if (temp_sz == 2){
5248  size[0] = params->global_work_size[0];
5249  size[1] = params->global_work_size[1];
5250  work_dimension = 2;
5251  }
5252  else{
5253  size[0] = params->global_work_size[0];
5254  size[1] = params->global_work_size[1];
5255  size[2] = params->global_work_size[2];
5256  if (temp_sz > 3){
5257  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5258  }
5259  }
5260  }
5261 
5262  cl_event gpuExec;
5263 
5264  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5265 
5266  clWaitForEvents(1, &gpuExec);
5267 
5268  char *result = (char *) malloc(typesz);
5269  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5270 
5271  std::vector<char> res = std::vector<char>();
5272  res.assign(result, result+sz);
5273 
5274  clReleaseCommandQueue (queue);
5275  clReleaseMemObject(buffer);
5276  clReleaseMemObject(buffer2);
5277  clReleaseMemObject(buffer3);
5278  clReleaseEvent(gpuExec);
5279  free(result);
5280 
5281  return res;
5282  }
5283 
5284  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
5285  size_t sz = v->size();
5286  size_t sz2 = v2.size();
5287  size_t sz3 = v3.size();
5288  size_t typesz = sizeof(char) * sz;
5289  size_t typesz2 = sizeof(char) * sz2;
5290  size_t typesz3 = sizeof(int) * sz3;
5291  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5292  if (temp_sz > 0){
5293  if (temp_sz > 2){
5294  if (temp_sz > 3){
5295  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5296  }
5297  typesz = sizeof(char) * params->buffers_size[0];
5298  typesz2 = sizeof(char) * params->buffers_size[1];
5299  typesz3 = sizeof(int) * params->buffers_size[2];
5300  }
5301  else{
5302  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5303  }
5304  }
5305  cl_int error = 0;
5306  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5307  checkError(error);
5308  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5309  checkError(error);
5310  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5311  checkError(error);
5312  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5313  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5314  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5315  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5316 
5317  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5318  checkError (error);
5319  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5320  checkError (error);
5321  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5322  checkError (error);
5323 
5324  size_t size[3] = {sz, sz2, sz3};
5325  size_t work_dimension = 3;
5326 
5327  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5328  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5329  work_dimension = 1;
5330  }
5331  else if(temp_sz > 0){
5332  if (params->multi_dimensional){
5333  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5334  For default multidimensional global work size, leave the global_work_size vector empty, \
5335  and set multi_dimensional to true. Setting the global work size based on the values inside \
5336  the global_work_size vector.");
5337  }
5338  if (temp_sz == 1){
5339  size[0] = params->global_work_size[0];
5340  work_dimension = 1;
5341  }
5342  else if (temp_sz == 2){
5343  size[0] = params->global_work_size[0];
5344  size[1] = params->global_work_size[1];
5345  work_dimension = 2;
5346  }
5347  else{
5348  size[0] = params->global_work_size[0];
5349  size[1] = params->global_work_size[1];
5350  size[2] = params->global_work_size[2];
5351  if (temp_sz > 3){
5352  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5353  }
5354  }
5355  }
5356 
5357  cl_event gpuExec;
5358 
5359  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5360 
5361  clWaitForEvents(1, &gpuExec);
5362 
5363  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5364 
5365  clWaitForEvents(1, &gpuExec);
5366 
5367  char *result = (char *) malloc(typesz);
5368  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5369 
5370  v->assign(result, result+sz);
5371 
5372  clReleaseCommandQueue (queue);
5373  clReleaseMemObject(buffer);
5374  clReleaseMemObject(buffer2);
5375  clReleaseMemObject(buffer3);
5376  clReleaseEvent(gpuExec);
5377  free(result);
5378  }
5379 
5380  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
5381  size_t sz = v->size();
5382  size_t sz2 = v2->size();
5383  size_t sz3 = v3.size();
5384  size_t typesz = sizeof(char) * sz;
5385  size_t typesz2 = sizeof(char) * sz2;
5386  size_t typesz3 = sizeof(int) * sz3;
5387  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5388  if (temp_sz > 0){
5389  if (temp_sz > 2){
5390  if (temp_sz > 3){
5391  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5392  }
5393  typesz = sizeof(char) * params->buffers_size[0];
5394  typesz2 = sizeof(char) * params->buffers_size[1];
5395  typesz3 = sizeof(int) * params->buffers_size[2];
5396  }
5397  else{
5398  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5399  }
5400  }
5401  cl_int error = 0;
5402  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5403  checkError(error);
5404  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5405  checkError(error);
5406  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5407  checkError(error);
5408  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5409  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5410  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5411  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5412 
5413  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5414  checkError (error);
5415  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5416  checkError (error);
5417  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5418  checkError (error);
5419 
5420  size_t size[3] = {sz, sz2, sz3};
5421  size_t work_dimension = 3;
5422 
5423  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5424  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5425  work_dimension = 1;
5426  }
5427  else if(temp_sz > 0){
5428  if (params->multi_dimensional){
5429  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5430  For default multidimensional global work size, leave the global_work_size vector empty, \
5431  and set multi_dimensional to true. Setting the global work size based on the values inside \
5432  the global_work_size vector.");
5433  }
5434  if (temp_sz == 1){
5435  size[0] = params->global_work_size[0];
5436  work_dimension = 1;
5437  }
5438  else if (temp_sz == 2){
5439  size[0] = params->global_work_size[0];
5440  size[1] = params->global_work_size[1];
5441  work_dimension = 2;
5442  }
5443  else{
5444  size[0] = params->global_work_size[0];
5445  size[1] = params->global_work_size[1];
5446  size[2] = params->global_work_size[2];
5447  if (temp_sz > 3){
5448  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5449  }
5450  }
5451  }
5452 
5453  cl_event gpuExec;
5454 
5455  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5456 
5457  clWaitForEvents(1, &gpuExec);
5458 
5459  char *result = (char *) malloc(typesz);
5460  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5461 
5462  v->assign(result, result+sz);
5463 
5464  if (typesz2 != typesz or sz != sz2){
5465  char *result2;
5466  result2 = (char *) malloc(typesz2);
5467  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5468 
5469  v2->assign(result2, result2+sz2);
5470  free(result2);
5471  }
5472  else{
5473  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5474 
5475  v2->assign(result, result+sz2);
5476  }
5477 
5478  clReleaseCommandQueue (queue);
5479  clReleaseMemObject(buffer);
5480  clReleaseMemObject(buffer2);
5481  clReleaseMemObject(buffer3);
5482  clReleaseEvent(gpuExec);
5483  free(result);
5484  }
5485 
5486  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
5487  size_t sz = v->size();
5488  size_t sz2 = v2->size();
5489  size_t sz3 = v3->size();
5490  size_t typesz = sizeof(char) * sz;
5491  size_t typesz2 = sizeof(char) * sz2;
5492  size_t typesz3 = sizeof(int) * sz3;
5493  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5494  if (temp_sz > 0){
5495  if (temp_sz > 2){
5496  if (temp_sz > 3){
5497  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5498  }
5499  typesz = sizeof(char) * params->buffers_size[0];
5500  typesz2 = sizeof(char) * params->buffers_size[1];
5501  typesz3 = sizeof(int) * params->buffers_size[2];
5502  }
5503  else{
5504  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5505  }
5506  }
5507  cl_int error = 0;
5508  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5509  checkError(error);
5510  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5511  checkError(error);
5512  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5513  checkError(error);
5514  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5515  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5516  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5517  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5518 
5519  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5520  checkError (error);
5521  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5522  checkError (error);
5523  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5524  checkError (error);
5525 
5526  size_t size[3] = {sz, sz2, sz3};
5527  size_t work_dimension = 3;
5528 
5529  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5530  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5531  work_dimension = 1;
5532  }
5533  else if(temp_sz > 0){
5534  if (params->multi_dimensional){
5535  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5536  For default multidimensional global work size, leave the global_work_size vector empty, \
5537  and set multi_dimensional to true. Setting the global work size based on the values inside \
5538  the global_work_size vector.");
5539  }
5540  if (temp_sz == 1){
5541  size[0] = params->global_work_size[0];
5542  work_dimension = 1;
5543  }
5544  else if (temp_sz == 2){
5545  size[0] = params->global_work_size[0];
5546  size[1] = params->global_work_size[1];
5547  work_dimension = 2;
5548  }
5549  else{
5550  size[0] = params->global_work_size[0];
5551  size[1] = params->global_work_size[1];
5552  size[2] = params->global_work_size[2];
5553  if (temp_sz > 3){
5554  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5555  }
5556  }
5557  }
5558 
5559  cl_event gpuExec;
5560 
5561  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5562 
5563  clWaitForEvents(1, &gpuExec);
5564 
5565  char *result = (char *) malloc(typesz);
5566  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5567 
5568  v->assign(result, result+sz);
5569 
5570  if (typesz2 != typesz or sz != sz2){
5571  char *result2;
5572  result2 = (char *) malloc(typesz2);
5573  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5574 
5575  v2->assign(result2, result2+sz2);
5576  free(result2);
5577  }
5578  else{
5579  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5580 
5581  v2->assign(result, result+sz2);
5582  }
5583 
5584  if (typesz3 != typesz or sz != sz3){
5585  int *result3;
5586  result3 = (int *) malloc(typesz3);
5587  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
5588 
5589  v3->assign(result3, result3+sz3);
5590  free(result3);
5591  }
5592  else{
5593  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5594 
5595  v3->assign(result, result+sz3);
5596  }
5597 
5598  clReleaseCommandQueue (queue);
5599  clReleaseMemObject(buffer);
5600  clReleaseMemObject(buffer2);
5601  clReleaseMemObject(buffer3);
5602  clReleaseEvent(gpuExec);
5603  free(result);
5604  }
5605 
5606 
5607  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
5608  size_t sz = v.size();
5609  size_t sz2 = v2.size();
5610  size_t sz3 = v3.size();
5611  size_t typesz = sizeof(char) * sz;
5612  size_t typesz2 = sizeof(char) * sz2;
5613  size_t typesz3 = sizeof(float) * sz3;
5614  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5615  if (temp_sz > 0){
5616  if (temp_sz > 2){
5617  if (temp_sz > 3){
5618  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5619  }
5620  typesz = sizeof(char) * params->buffers_size[0];
5621  typesz2 = sizeof(char) * params->buffers_size[1];
5622  typesz3 = sizeof(float) * params->buffers_size[2];
5623  }
5624  else{
5625  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5626  }
5627  }
5628  cl_int error = 0;
5629  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5630  checkError(error);
5631  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5632  checkError(error);
5633  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5634  checkError(error);
5635  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5636  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5637  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5638  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5639 
5640  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
5641  checkError (error);
5642  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5643  checkError (error);
5644  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5645  checkError (error);
5646 
5647  size_t size[3] = {sz, sz2, sz3};
5648  size_t work_dimension = 3;
5649 
5650  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5651  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5652  work_dimension = 1;
5653  }
5654  else if(temp_sz > 0){
5655  if (params->multi_dimensional){
5656  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5657  For default multidimensional global work size, leave the global_work_size vector empty, \
5658  and set multi_dimensional to true. Setting the global work size based on the values inside \
5659  the global_work_size vector.");
5660  }
5661  if (temp_sz == 1){
5662  size[0] = params->global_work_size[0];
5663  work_dimension = 1;
5664  }
5665  else if (temp_sz == 2){
5666  size[0] = params->global_work_size[0];
5667  size[1] = params->global_work_size[1];
5668  work_dimension = 2;
5669  }
5670  else{
5671  size[0] = params->global_work_size[0];
5672  size[1] = params->global_work_size[1];
5673  size[2] = params->global_work_size[2];
5674  if (temp_sz > 3){
5675  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5676  }
5677  }
5678  }
5679 
5680  cl_event gpuExec;
5681 
5682  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5683 
5684  clWaitForEvents(1, &gpuExec);
5685 
5686  char *result = (char *) malloc(typesz);
5687  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5688 
5689  std::vector<char> res = std::vector<char>();
5690  res.assign(result, result+sz);
5691 
5692  clReleaseCommandQueue (queue);
5693  clReleaseMemObject(buffer);
5694  clReleaseMemObject(buffer2);
5695  clReleaseMemObject(buffer3);
5696  clReleaseEvent(gpuExec);
5697  free(result);
5698 
5699  return res;
5700  }
5701 
5702  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
5703  size_t sz = v->size();
5704  size_t sz2 = v2.size();
5705  size_t sz3 = v3.size();
5706  size_t typesz = sizeof(char) * sz;
5707  size_t typesz2 = sizeof(char) * sz2;
5708  size_t typesz3 = sizeof(float) * sz3;
5709  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5710  if (temp_sz > 0){
5711  if (temp_sz > 2){
5712  if (temp_sz > 3){
5713  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5714  }
5715  typesz = sizeof(char) * params->buffers_size[0];
5716  typesz2 = sizeof(char) * params->buffers_size[1];
5717  typesz3 = sizeof(float) * params->buffers_size[2];
5718  }
5719  else{
5720  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5721  }
5722  }
5723  cl_int error = 0;
5724  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5725  checkError(error);
5726  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5727  checkError(error);
5728  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5729  checkError(error);
5730  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5731  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5732  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5733  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5734 
5735  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5736  checkError (error);
5737  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5738  checkError (error);
5739  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5740  checkError (error);
5741 
5742  size_t size[3] = {sz, sz2, sz3};
5743  size_t work_dimension = 3;
5744 
5745  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5746  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5747  work_dimension = 1;
5748  }
5749  else if(temp_sz > 0){
5750  if (params->multi_dimensional){
5751  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5752  For default multidimensional global work size, leave the global_work_size vector empty, \
5753  and set multi_dimensional to true. Setting the global work size based on the values inside \
5754  the global_work_size vector.");
5755  }
5756  if (temp_sz == 1){
5757  size[0] = params->global_work_size[0];
5758  work_dimension = 1;
5759  }
5760  else if (temp_sz == 2){
5761  size[0] = params->global_work_size[0];
5762  size[1] = params->global_work_size[1];
5763  work_dimension = 2;
5764  }
5765  else{
5766  size[0] = params->global_work_size[0];
5767  size[1] = params->global_work_size[1];
5768  size[2] = params->global_work_size[2];
5769  if (temp_sz > 3){
5770  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5771  }
5772  }
5773  }
5774 
5775  cl_event gpuExec;
5776 
5777  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5778 
5779  clWaitForEvents(1, &gpuExec);
5780 
5781  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5782 
5783  clWaitForEvents(1, &gpuExec);
5784 
5785  char *result = (char *) malloc(typesz);
5786  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5787 
5788  v->assign(result, result+sz);
5789 
5790  clReleaseCommandQueue (queue);
5791  clReleaseMemObject(buffer);
5792  clReleaseMemObject(buffer2);
5793  clReleaseMemObject(buffer3);
5794  clReleaseEvent(gpuExec);
5795  free(result);
5796  }
5797 
5798  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
5799  size_t sz = v->size();
5800  size_t sz2 = v2->size();
5801  size_t sz3 = v3.size();
5802  size_t typesz = sizeof(char) * sz;
5803  size_t typesz2 = sizeof(char) * sz2;
5804  size_t typesz3 = sizeof(float) * sz3;
5805  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5806  if (temp_sz > 0){
5807  if (temp_sz > 2){
5808  if (temp_sz > 3){
5809  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5810  }
5811  typesz = sizeof(char) * params->buffers_size[0];
5812  typesz2 = sizeof(char) * params->buffers_size[1];
5813  typesz3 = sizeof(float) * params->buffers_size[2];
5814  }
5815  else{
5816  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5817  }
5818  }
5819  cl_int error = 0;
5820  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5821  checkError(error);
5822  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5823  checkError(error);
5824  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5825  checkError(error);
5826  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5827  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5828  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5829  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5830 
5831  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5832  checkError (error);
5833  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5834  checkError (error);
5835  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5836  checkError (error);
5837 
5838  size_t size[3] = {sz, sz2, sz3};
5839  size_t work_dimension = 3;
5840 
5841  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5842  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5843  work_dimension = 1;
5844  }
5845  else if(temp_sz > 0){
5846  if (params->multi_dimensional){
5847  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5848  For default multidimensional global work size, leave the global_work_size vector empty, \
5849  and set multi_dimensional to true. Setting the global work size based on the values inside \
5850  the global_work_size vector.");
5851  }
5852  if (temp_sz == 1){
5853  size[0] = params->global_work_size[0];
5854  work_dimension = 1;
5855  }
5856  else if (temp_sz == 2){
5857  size[0] = params->global_work_size[0];
5858  size[1] = params->global_work_size[1];
5859  work_dimension = 2;
5860  }
5861  else{
5862  size[0] = params->global_work_size[0];
5863  size[1] = params->global_work_size[1];
5864  size[2] = params->global_work_size[2];
5865  if (temp_sz > 3){
5866  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5867  }
5868  }
5869  }
5870 
5871  cl_event gpuExec;
5872 
5873  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5874 
5875  clWaitForEvents(1, &gpuExec);
5876 
5877  char *result = (char *) malloc(typesz);
5878  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5879 
5880  v->assign(result, result+sz);
5881 
5882  if (typesz2 != typesz or sz != sz2){
5883  char *result2;
5884  result2 = (char *) malloc(typesz2);
5885  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5886 
5887  v2->assign(result2, result2+sz2);
5888  free(result2);
5889  }
5890  else{
5891  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5892 
5893  v2->assign(result, result+sz2);
5894  }
5895 
5896  clReleaseCommandQueue (queue);
5897  clReleaseMemObject(buffer);
5898  clReleaseMemObject(buffer2);
5899  clReleaseMemObject(buffer3);
5900  clReleaseEvent(gpuExec);
5901  free(result);
5902  }
5903 
5904  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
5905  size_t sz = v->size();
5906  size_t sz2 = v2->size();
5907  size_t sz3 = v3->size();
5908  size_t typesz = sizeof(char) * sz;
5909  size_t typesz2 = sizeof(char) * sz2;
5910  size_t typesz3 = sizeof(float) * sz3;
5911  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
5912  if (temp_sz > 0){
5913  if (temp_sz > 2){
5914  if (temp_sz > 3){
5915  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5916  }
5917  typesz = sizeof(char) * params->buffers_size[0];
5918  typesz2 = sizeof(char) * params->buffers_size[1];
5919  typesz3 = sizeof(float) * params->buffers_size[2];
5920  }
5921  else{
5922  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5923  }
5924  }
5925  cl_int error = 0;
5926  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5927  checkError(error);
5928  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5929  checkError(error);
5930  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5931  checkError(error);
5932  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
5933  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
5934  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
5935  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
5936 
5937  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5938  checkError (error);
5939  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5940  checkError (error);
5941  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5942  checkError (error);
5943 
5944  size_t size[3] = {sz, sz2, sz3};
5945  size_t work_dimension = 3;
5946 
5947  temp_sz = params != NULL ? params->global_work_size.size() : 0;
5948  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
5949  work_dimension = 1;
5950  }
5951  else if(temp_sz > 0){
5952  if (params->multi_dimensional){
5953  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
5954  For default multidimensional global work size, leave the global_work_size vector empty, \
5955  and set multi_dimensional to true. Setting the global work size based on the values inside \
5956  the global_work_size vector.");
5957  }
5958  if (temp_sz == 1){
5959  size[0] = params->global_work_size[0];
5960  work_dimension = 1;
5961  }
5962  else if (temp_sz == 2){
5963  size[0] = params->global_work_size[0];
5964  size[1] = params->global_work_size[1];
5965  work_dimension = 2;
5966  }
5967  else{
5968  size[0] = params->global_work_size[0];
5969  size[1] = params->global_work_size[1];
5970  size[2] = params->global_work_size[2];
5971  if (temp_sz > 3){
5972  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5973  }
5974  }
5975  }
5976 
5977  cl_event gpuExec;
5978 
5979  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5980 
5981  clWaitForEvents(1, &gpuExec);
5982 
5983  char *result = (char *) malloc(typesz);
5984  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5985 
5986  v->assign(result, result+sz);
5987 
5988  if (typesz2 != typesz or sz != sz2){
5989  char *result2;
5990  result2 = (char *) malloc(typesz2);
5991  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5992 
5993  v2->assign(result2, result2+sz2);
5994  free(result2);
5995  }
5996  else{
5997  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5998 
5999  v2->assign(result, result+sz2);
6000  }
6001 
6002  if (typesz3 != typesz or sz != sz3){
6003  float *result3;
6004  result3 = (float *) malloc(typesz3);
6005  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6006 
6007  v3->assign(result3, result3+sz3);
6008  free(result3);
6009  }
6010  else{
6011  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6012 
6013  v3->assign(result, result+sz3);
6014  }
6015 
6016  clReleaseCommandQueue (queue);
6017  clReleaseMemObject(buffer);
6018  clReleaseMemObject(buffer2);
6019  clReleaseMemObject(buffer3);
6020  clReleaseEvent(gpuExec);
6021  free(result);
6022  }
6023 
6024 
6025  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
6026  size_t sz = v.size();
6027  size_t sz2 = v2.size();
6028  size_t sz3 = v3.size();
6029  size_t typesz = sizeof(char) * sz;
6030  size_t typesz2 = sizeof(char) * sz2;
6031  size_t typesz3 = sizeof(double) * sz3;
6032  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6033  if (temp_sz > 0){
6034  if (temp_sz > 2){
6035  if (temp_sz > 3){
6036  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6037  }
6038  typesz = sizeof(char) * params->buffers_size[0];
6039  typesz2 = sizeof(char) * params->buffers_size[1];
6040  typesz3 = sizeof(double) * params->buffers_size[2];
6041  }
6042  else{
6043  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6044  }
6045  }
6046  cl_int error = 0;
6047  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6048  checkError(error);
6049  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6050  checkError(error);
6051  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6052  checkError(error);
6053  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6054  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6055  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6056  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6057 
6058  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6059  checkError (error);
6060  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6061  checkError (error);
6062  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6063  checkError (error);
6064 
6065  size_t size[3] = {sz, sz2, sz3};
6066  size_t work_dimension = 3;
6067 
6068  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6069  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6070  work_dimension = 1;
6071  }
6072  else if(temp_sz > 0){
6073  if (params->multi_dimensional){
6074  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6075  For default multidimensional global work size, leave the global_work_size vector empty, \
6076  and set multi_dimensional to true. Setting the global work size based on the values inside \
6077  the global_work_size vector.");
6078  }
6079  if (temp_sz == 1){
6080  size[0] = params->global_work_size[0];
6081  work_dimension = 1;
6082  }
6083  else if (temp_sz == 2){
6084  size[0] = params->global_work_size[0];
6085  size[1] = params->global_work_size[1];
6086  work_dimension = 2;
6087  }
6088  else{
6089  size[0] = params->global_work_size[0];
6090  size[1] = params->global_work_size[1];
6091  size[2] = params->global_work_size[2];
6092  if (temp_sz > 3){
6093  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6094  }
6095  }
6096  }
6097 
6098  cl_event gpuExec;
6099 
6100  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6101 
6102  clWaitForEvents(1, &gpuExec);
6103 
6104  char *result = (char *) malloc(typesz);
6105  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6106 
6107  std::vector<char> res = std::vector<char>();
6108  res.assign(result, result+sz);
6109 
6110  clReleaseCommandQueue (queue);
6111  clReleaseMemObject(buffer);
6112  clReleaseMemObject(buffer2);
6113  clReleaseMemObject(buffer3);
6114  clReleaseEvent(gpuExec);
6115  free(result);
6116 
6117  return res;
6118  }
6119 
6120  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
6121  size_t sz = v->size();
6122  size_t sz2 = v2.size();
6123  size_t sz3 = v3.size();
6124  size_t typesz = sizeof(char) * sz;
6125  size_t typesz2 = sizeof(char) * sz2;
6126  size_t typesz3 = sizeof(double) * sz3;
6127  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6128  if (temp_sz > 0){
6129  if (temp_sz > 2){
6130  if (temp_sz > 3){
6131  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6132  }
6133  typesz = sizeof(char) * params->buffers_size[0];
6134  typesz2 = sizeof(char) * params->buffers_size[1];
6135  typesz3 = sizeof(double) * params->buffers_size[2];
6136  }
6137  else{
6138  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6139  }
6140  }
6141  cl_int error = 0;
6142  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6143  checkError(error);
6144  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6145  checkError(error);
6146  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6147  checkError(error);
6148  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6149  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6150  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6151  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6152 
6153  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6154  checkError (error);
6155  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6156  checkError (error);
6157  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6158  checkError (error);
6159 
6160  size_t size[3] = {sz, sz2, sz3};
6161  size_t work_dimension = 3;
6162 
6163  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6164  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6165  work_dimension = 1;
6166  }
6167  else if(temp_sz > 0){
6168  if (params->multi_dimensional){
6169  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6170  For default multidimensional global work size, leave the global_work_size vector empty, \
6171  and set multi_dimensional to true. Setting the global work size based on the values inside \
6172  the global_work_size vector.");
6173  }
6174  if (temp_sz == 1){
6175  size[0] = params->global_work_size[0];
6176  work_dimension = 1;
6177  }
6178  else if (temp_sz == 2){
6179  size[0] = params->global_work_size[0];
6180  size[1] = params->global_work_size[1];
6181  work_dimension = 2;
6182  }
6183  else{
6184  size[0] = params->global_work_size[0];
6185  size[1] = params->global_work_size[1];
6186  size[2] = params->global_work_size[2];
6187  if (temp_sz > 3){
6188  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6189  }
6190  }
6191  }
6192 
6193  cl_event gpuExec;
6194 
6195  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6196 
6197  clWaitForEvents(1, &gpuExec);
6198 
6199  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6200 
6201  clWaitForEvents(1, &gpuExec);
6202 
6203  char *result = (char *) malloc(typesz);
6204  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6205 
6206  v->assign(result, result+sz);
6207 
6208  clReleaseCommandQueue (queue);
6209  clReleaseMemObject(buffer);
6210  clReleaseMemObject(buffer2);
6211  clReleaseMemObject(buffer3);
6212  clReleaseEvent(gpuExec);
6213  free(result);
6214  }
6215 
6216  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
6217  size_t sz = v->size();
6218  size_t sz2 = v2->size();
6219  size_t sz3 = v3.size();
6220  size_t typesz = sizeof(char) * sz;
6221  size_t typesz2 = sizeof(char) * sz2;
6222  size_t typesz3 = sizeof(double) * sz3;
6223  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6224  if (temp_sz > 0){
6225  if (temp_sz > 2){
6226  if (temp_sz > 3){
6227  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6228  }
6229  typesz = sizeof(char) * params->buffers_size[0];
6230  typesz2 = sizeof(char) * params->buffers_size[1];
6231  typesz3 = sizeof(double) * params->buffers_size[2];
6232  }
6233  else{
6234  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6235  }
6236  }
6237  cl_int error = 0;
6238  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6239  checkError(error);
6240  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6241  checkError(error);
6242  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6243  checkError(error);
6244  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6245  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6246  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6247  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6248 
6249  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6250  checkError (error);
6251  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6252  checkError (error);
6253  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6254  checkError (error);
6255 
6256  size_t size[3] = {sz, sz2, sz3};
6257  size_t work_dimension = 3;
6258 
6259  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6260  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6261  work_dimension = 1;
6262  }
6263  else if(temp_sz > 0){
6264  if (params->multi_dimensional){
6265  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6266  For default multidimensional global work size, leave the global_work_size vector empty, \
6267  and set multi_dimensional to true. Setting the global work size based on the values inside \
6268  the global_work_size vector.");
6269  }
6270  if (temp_sz == 1){
6271  size[0] = params->global_work_size[0];
6272  work_dimension = 1;
6273  }
6274  else if (temp_sz == 2){
6275  size[0] = params->global_work_size[0];
6276  size[1] = params->global_work_size[1];
6277  work_dimension = 2;
6278  }
6279  else{
6280  size[0] = params->global_work_size[0];
6281  size[1] = params->global_work_size[1];
6282  size[2] = params->global_work_size[2];
6283  if (temp_sz > 3){
6284  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6285  }
6286  }
6287  }
6288 
6289  cl_event gpuExec;
6290 
6291  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6292 
6293  clWaitForEvents(1, &gpuExec);
6294 
6295  char *result = (char *) malloc(typesz);
6296  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6297 
6298  v->assign(result, result+sz);
6299 
6300  if (typesz2 != typesz or sz != sz2){
6301  char *result2;
6302  result2 = (char *) malloc(typesz2);
6303  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6304 
6305  v2->assign(result2, result2+sz2);
6306  free(result2);
6307  }
6308  else{
6309  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6310 
6311  v2->assign(result, result+sz2);
6312  }
6313 
6314  clReleaseCommandQueue (queue);
6315  clReleaseMemObject(buffer);
6316  clReleaseMemObject(buffer2);
6317  clReleaseMemObject(buffer3);
6318  clReleaseEvent(gpuExec);
6319  free(result);
6320  }
6321 
6322  void ROS_OpenCL::process(std::vector<char>* v, std::vector<char>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
6323  size_t sz = v->size();
6324  size_t sz2 = v2->size();
6325  size_t sz3 = v3->size();
6326  size_t typesz = sizeof(char) * sz;
6327  size_t typesz2 = sizeof(char) * sz2;
6328  size_t typesz3 = sizeof(double) * sz3;
6329  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6330  if (temp_sz > 0){
6331  if (temp_sz > 2){
6332  if (temp_sz > 3){
6333  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6334  }
6335  typesz = sizeof(char) * params->buffers_size[0];
6336  typesz2 = sizeof(char) * params->buffers_size[1];
6337  typesz3 = sizeof(double) * params->buffers_size[2];
6338  }
6339  else{
6340  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6341  }
6342  }
6343  cl_int error = 0;
6344  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6345  checkError(error);
6346  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6347  checkError(error);
6348  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
6349  checkError(error);
6350  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6351  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6352  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6353  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6354 
6355  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6356  checkError (error);
6357  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6358  checkError (error);
6359  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
6360  checkError (error);
6361 
6362  size_t size[3] = {sz, sz2, sz3};
6363  size_t work_dimension = 3;
6364 
6365  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6366  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6367  work_dimension = 1;
6368  }
6369  else if(temp_sz > 0){
6370  if (params->multi_dimensional){
6371  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6372  For default multidimensional global work size, leave the global_work_size vector empty, \
6373  and set multi_dimensional to true. Setting the global work size based on the values inside \
6374  the global_work_size vector.");
6375  }
6376  if (temp_sz == 1){
6377  size[0] = params->global_work_size[0];
6378  work_dimension = 1;
6379  }
6380  else if (temp_sz == 2){
6381  size[0] = params->global_work_size[0];
6382  size[1] = params->global_work_size[1];
6383  work_dimension = 2;
6384  }
6385  else{
6386  size[0] = params->global_work_size[0];
6387  size[1] = params->global_work_size[1];
6388  size[2] = params->global_work_size[2];
6389  if (temp_sz > 3){
6390  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6391  }
6392  }
6393  }
6394 
6395  cl_event gpuExec;
6396 
6397  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6398 
6399  clWaitForEvents(1, &gpuExec);
6400 
6401  char *result = (char *) malloc(typesz);
6402  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6403 
6404  v->assign(result, result+sz);
6405 
6406  if (typesz2 != typesz or sz != sz2){
6407  char *result2;
6408  result2 = (char *) malloc(typesz2);
6409  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6410 
6411  v2->assign(result2, result2+sz2);
6412  free(result2);
6413  }
6414  else{
6415  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6416 
6417  v2->assign(result, result+sz2);
6418  }
6419 
6420  if (typesz3 != typesz or sz != sz3){
6421  double *result3;
6422  result3 = (double *) malloc(typesz3);
6423  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6424 
6425  v3->assign(result3, result3+sz3);
6426  free(result3);
6427  }
6428  else{
6429  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6430 
6431  v3->assign(result, result+sz3);
6432  }
6433 
6434  clReleaseCommandQueue (queue);
6435  clReleaseMemObject(buffer);
6436  clReleaseMemObject(buffer2);
6437  clReleaseMemObject(buffer3);
6438  clReleaseEvent(gpuExec);
6439  free(result);
6440  }
6441 
6442 
6443  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
6444  size_t sz = v.size();
6445  size_t sz2 = v2.size();
6446  size_t sz3 = v3.size();
6447  size_t typesz = sizeof(char) * sz;
6448  size_t typesz2 = sizeof(int) * sz2;
6449  size_t typesz3 = sizeof(char) * sz3;
6450  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6451  if (temp_sz > 0){
6452  if (temp_sz > 2){
6453  if (temp_sz > 3){
6454  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6455  }
6456  typesz = sizeof(char) * params->buffers_size[0];
6457  typesz2 = sizeof(int) * params->buffers_size[1];
6458  typesz3 = sizeof(char) * params->buffers_size[2];
6459  }
6460  else{
6461  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6462  }
6463  }
6464  cl_int error = 0;
6465  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6466  checkError(error);
6467  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6468  checkError(error);
6469  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6470  checkError(error);
6471  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6472  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6473  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6474  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6475 
6476  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6477  checkError (error);
6478  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6479  checkError (error);
6480  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6481  checkError (error);
6482 
6483  size_t size[3] = {sz, sz2, sz3};
6484  size_t work_dimension = 3;
6485 
6486  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6487  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6488  work_dimension = 1;
6489  }
6490  else if(temp_sz > 0){
6491  if (params->multi_dimensional){
6492  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6493  For default multidimensional global work size, leave the global_work_size vector empty, \
6494  and set multi_dimensional to true. Setting the global work size based on the values inside \
6495  the global_work_size vector.");
6496  }
6497  if (temp_sz == 1){
6498  size[0] = params->global_work_size[0];
6499  work_dimension = 1;
6500  }
6501  else if (temp_sz == 2){
6502  size[0] = params->global_work_size[0];
6503  size[1] = params->global_work_size[1];
6504  work_dimension = 2;
6505  }
6506  else{
6507  size[0] = params->global_work_size[0];
6508  size[1] = params->global_work_size[1];
6509  size[2] = params->global_work_size[2];
6510  if (temp_sz > 3){
6511  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6512  }
6513  }
6514  }
6515 
6516  cl_event gpuExec;
6517 
6518  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6519 
6520  clWaitForEvents(1, &gpuExec);
6521 
6522  char *result = (char *) malloc(typesz);
6523  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6524 
6525  std::vector<char> res = std::vector<char>();
6526  res.assign(result, result+sz);
6527 
6528  clReleaseCommandQueue (queue);
6529  clReleaseMemObject(buffer);
6530  clReleaseMemObject(buffer2);
6531  clReleaseMemObject(buffer3);
6532  clReleaseEvent(gpuExec);
6533  free(result);
6534 
6535  return res;
6536  }
6537 
6538  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
6539  size_t sz = v->size();
6540  size_t sz2 = v2.size();
6541  size_t sz3 = v3.size();
6542  size_t typesz = sizeof(char) * sz;
6543  size_t typesz2 = sizeof(int) * sz2;
6544  size_t typesz3 = sizeof(char) * sz3;
6545  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6546  if (temp_sz > 0){
6547  if (temp_sz > 2){
6548  if (temp_sz > 3){
6549  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6550  }
6551  typesz = sizeof(char) * params->buffers_size[0];
6552  typesz2 = sizeof(int) * params->buffers_size[1];
6553  typesz3 = sizeof(char) * params->buffers_size[2];
6554  }
6555  else{
6556  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6557  }
6558  }
6559  cl_int error = 0;
6560  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6561  checkError(error);
6562  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6563  checkError(error);
6564  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6565  checkError(error);
6566  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6567  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6568  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6569  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6570 
6571  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6572  checkError (error);
6573  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6574  checkError (error);
6575  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6576  checkError (error);
6577 
6578  size_t size[3] = {sz, sz2, sz3};
6579  size_t work_dimension = 3;
6580 
6581  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6582  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6583  work_dimension = 1;
6584  }
6585  else if(temp_sz > 0){
6586  if (params->multi_dimensional){
6587  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6588  For default multidimensional global work size, leave the global_work_size vector empty, \
6589  and set multi_dimensional to true. Setting the global work size based on the values inside \
6590  the global_work_size vector.");
6591  }
6592  if (temp_sz == 1){
6593  size[0] = params->global_work_size[0];
6594  work_dimension = 1;
6595  }
6596  else if (temp_sz == 2){
6597  size[0] = params->global_work_size[0];
6598  size[1] = params->global_work_size[1];
6599  work_dimension = 2;
6600  }
6601  else{
6602  size[0] = params->global_work_size[0];
6603  size[1] = params->global_work_size[1];
6604  size[2] = params->global_work_size[2];
6605  if (temp_sz > 3){
6606  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6607  }
6608  }
6609  }
6610 
6611  cl_event gpuExec;
6612 
6613  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6614 
6615  clWaitForEvents(1, &gpuExec);
6616 
6617  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6618 
6619  clWaitForEvents(1, &gpuExec);
6620 
6621  char *result = (char *) malloc(typesz);
6622  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6623 
6624  v->assign(result, result+sz);
6625 
6626  clReleaseCommandQueue (queue);
6627  clReleaseMemObject(buffer);
6628  clReleaseMemObject(buffer2);
6629  clReleaseMemObject(buffer3);
6630  clReleaseEvent(gpuExec);
6631  free(result);
6632  }
6633 
6634  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
6635  size_t sz = v->size();
6636  size_t sz2 = v2->size();
6637  size_t sz3 = v3.size();
6638  size_t typesz = sizeof(char) * sz;
6639  size_t typesz2 = sizeof(int) * sz2;
6640  size_t typesz3 = sizeof(char) * sz3;
6641  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6642  if (temp_sz > 0){
6643  if (temp_sz > 2){
6644  if (temp_sz > 3){
6645  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6646  }
6647  typesz = sizeof(char) * params->buffers_size[0];
6648  typesz2 = sizeof(int) * params->buffers_size[1];
6649  typesz3 = sizeof(char) * params->buffers_size[2];
6650  }
6651  else{
6652  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6653  }
6654  }
6655  cl_int error = 0;
6656  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6657  checkError(error);
6658  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6659  checkError(error);
6660  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6661  checkError(error);
6662  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6663  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6664  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6665  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6666 
6667  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6668  checkError (error);
6669  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6670  checkError (error);
6671  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6672  checkError (error);
6673 
6674  size_t size[3] = {sz, sz2, sz3};
6675  size_t work_dimension = 3;
6676 
6677  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6678  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6679  work_dimension = 1;
6680  }
6681  else if(temp_sz > 0){
6682  if (params->multi_dimensional){
6683  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6684  For default multidimensional global work size, leave the global_work_size vector empty, \
6685  and set multi_dimensional to true. Setting the global work size based on the values inside \
6686  the global_work_size vector.");
6687  }
6688  if (temp_sz == 1){
6689  size[0] = params->global_work_size[0];
6690  work_dimension = 1;
6691  }
6692  else if (temp_sz == 2){
6693  size[0] = params->global_work_size[0];
6694  size[1] = params->global_work_size[1];
6695  work_dimension = 2;
6696  }
6697  else{
6698  size[0] = params->global_work_size[0];
6699  size[1] = params->global_work_size[1];
6700  size[2] = params->global_work_size[2];
6701  if (temp_sz > 3){
6702  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6703  }
6704  }
6705  }
6706 
6707  cl_event gpuExec;
6708 
6709  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6710 
6711  clWaitForEvents(1, &gpuExec);
6712 
6713  char *result = (char *) malloc(typesz);
6714  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6715 
6716  v->assign(result, result+sz);
6717 
6718  if (typesz2 != typesz or sz != sz2){
6719  int *result2;
6720  result2 = (int *) malloc(typesz2);
6721  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6722 
6723  v2->assign(result2, result2+sz2);
6724  free(result2);
6725  }
6726  else{
6727  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6728 
6729  v2->assign(result, result+sz2);
6730  }
6731 
6732  clReleaseCommandQueue (queue);
6733  clReleaseMemObject(buffer);
6734  clReleaseMemObject(buffer2);
6735  clReleaseMemObject(buffer3);
6736  clReleaseEvent(gpuExec);
6737  free(result);
6738  }
6739 
6740  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
6741  size_t sz = v->size();
6742  size_t sz2 = v2->size();
6743  size_t sz3 = v3->size();
6744  size_t typesz = sizeof(char) * sz;
6745  size_t typesz2 = sizeof(int) * sz2;
6746  size_t typesz3 = sizeof(char) * sz3;
6747  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6748  if (temp_sz > 0){
6749  if (temp_sz > 2){
6750  if (temp_sz > 3){
6751  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6752  }
6753  typesz = sizeof(char) * params->buffers_size[0];
6754  typesz2 = sizeof(int) * params->buffers_size[1];
6755  typesz3 = sizeof(char) * params->buffers_size[2];
6756  }
6757  else{
6758  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6759  }
6760  }
6761  cl_int error = 0;
6762  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6763  checkError(error);
6764  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6765  checkError(error);
6766  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
6767  checkError(error);
6768  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6769  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6770  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6771  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6772 
6773  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6774  checkError (error);
6775  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6776  checkError (error);
6777  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
6778  checkError (error);
6779 
6780  size_t size[3] = {sz, sz2, sz3};
6781  size_t work_dimension = 3;
6782 
6783  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6784  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6785  work_dimension = 1;
6786  }
6787  else if(temp_sz > 0){
6788  if (params->multi_dimensional){
6789  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6790  For default multidimensional global work size, leave the global_work_size vector empty, \
6791  and set multi_dimensional to true. Setting the global work size based on the values inside \
6792  the global_work_size vector.");
6793  }
6794  if (temp_sz == 1){
6795  size[0] = params->global_work_size[0];
6796  work_dimension = 1;
6797  }
6798  else if (temp_sz == 2){
6799  size[0] = params->global_work_size[0];
6800  size[1] = params->global_work_size[1];
6801  work_dimension = 2;
6802  }
6803  else{
6804  size[0] = params->global_work_size[0];
6805  size[1] = params->global_work_size[1];
6806  size[2] = params->global_work_size[2];
6807  if (temp_sz > 3){
6808  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6809  }
6810  }
6811  }
6812 
6813  cl_event gpuExec;
6814 
6815  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6816 
6817  clWaitForEvents(1, &gpuExec);
6818 
6819  char *result = (char *) malloc(typesz);
6820  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6821 
6822  v->assign(result, result+sz);
6823 
6824  if (typesz2 != typesz or sz != sz2){
6825  int *result2;
6826  result2 = (int *) malloc(typesz2);
6827  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6828 
6829  v2->assign(result2, result2+sz2);
6830  free(result2);
6831  }
6832  else{
6833  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6834 
6835  v2->assign(result, result+sz2);
6836  }
6837 
6838  if (typesz3 != typesz or sz != sz3){
6839  char *result3;
6840  result3 = (char *) malloc(typesz3);
6841  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6842 
6843  v3->assign(result3, result3+sz3);
6844  free(result3);
6845  }
6846  else{
6847  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6848 
6849  v3->assign(result, result+sz3);
6850  }
6851 
6852  clReleaseCommandQueue (queue);
6853  clReleaseMemObject(buffer);
6854  clReleaseMemObject(buffer2);
6855  clReleaseMemObject(buffer3);
6856  clReleaseEvent(gpuExec);
6857  free(result);
6858  }
6859 
6860 
6861  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
6862  size_t sz = v.size();
6863  size_t sz2 = v2.size();
6864  size_t sz3 = v3.size();
6865  size_t typesz = sizeof(char) * sz;
6866  size_t typesz2 = sizeof(int) * sz2;
6867  size_t typesz3 = sizeof(int) * sz3;
6868  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6869  if (temp_sz > 0){
6870  if (temp_sz > 2){
6871  if (temp_sz > 3){
6872  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6873  }
6874  typesz = sizeof(char) * params->buffers_size[0];
6875  typesz2 = sizeof(int) * params->buffers_size[1];
6876  typesz3 = sizeof(int) * params->buffers_size[2];
6877  }
6878  else{
6879  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6880  }
6881  }
6882  cl_int error = 0;
6883  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6884  checkError(error);
6885  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6886  checkError(error);
6887  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6888  checkError(error);
6889  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6890  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6891  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6892  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6893 
6894  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6895  checkError (error);
6896  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6897  checkError (error);
6898  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6899  checkError (error);
6900 
6901  size_t size[3] = {sz, sz2, sz3};
6902  size_t work_dimension = 3;
6903 
6904  temp_sz = params != NULL ? params->global_work_size.size() : 0;
6905  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
6906  work_dimension = 1;
6907  }
6908  else if(temp_sz > 0){
6909  if (params->multi_dimensional){
6910  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
6911  For default multidimensional global work size, leave the global_work_size vector empty, \
6912  and set multi_dimensional to true. Setting the global work size based on the values inside \
6913  the global_work_size vector.");
6914  }
6915  if (temp_sz == 1){
6916  size[0] = params->global_work_size[0];
6917  work_dimension = 1;
6918  }
6919  else if (temp_sz == 2){
6920  size[0] = params->global_work_size[0];
6921  size[1] = params->global_work_size[1];
6922  work_dimension = 2;
6923  }
6924  else{
6925  size[0] = params->global_work_size[0];
6926  size[1] = params->global_work_size[1];
6927  size[2] = params->global_work_size[2];
6928  if (temp_sz > 3){
6929  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6930  }
6931  }
6932  }
6933 
6934  cl_event gpuExec;
6935 
6936  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6937 
6938  clWaitForEvents(1, &gpuExec);
6939 
6940  char *result = (char *) malloc(typesz);
6941  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6942 
6943  std::vector<char> res = std::vector<char>();
6944  res.assign(result, result+sz);
6945 
6946  clReleaseCommandQueue (queue);
6947  clReleaseMemObject(buffer);
6948  clReleaseMemObject(buffer2);
6949  clReleaseMemObject(buffer3);
6950  clReleaseEvent(gpuExec);
6951  free(result);
6952 
6953  return res;
6954  }
6955 
6956  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
6957  size_t sz = v->size();
6958  size_t sz2 = v2.size();
6959  size_t sz3 = v3.size();
6960  size_t typesz = sizeof(char) * sz;
6961  size_t typesz2 = sizeof(int) * sz2;
6962  size_t typesz3 = sizeof(int) * sz3;
6963  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
6964  if (temp_sz > 0){
6965  if (temp_sz > 2){
6966  if (temp_sz > 3){
6967  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6968  }
6969  typesz = sizeof(char) * params->buffers_size[0];
6970  typesz2 = sizeof(int) * params->buffers_size[1];
6971  typesz3 = sizeof(int) * params->buffers_size[2];
6972  }
6973  else{
6974  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6975  }
6976  }
6977  cl_int error = 0;
6978  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6979  checkError(error);
6980  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6981  checkError(error);
6982  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6983  checkError(error);
6984  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
6985  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
6986  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
6987  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
6988 
6989  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6990  checkError (error);
6991  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6992  checkError (error);
6993  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6994  checkError (error);
6995 
6996  size_t size[3] = {sz, sz2, sz3};
6997  size_t work_dimension = 3;
6998 
6999  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7000  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7001  work_dimension = 1;
7002  }
7003  else if(temp_sz > 0){
7004  if (params->multi_dimensional){
7005  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7006  For default multidimensional global work size, leave the global_work_size vector empty, \
7007  and set multi_dimensional to true. Setting the global work size based on the values inside \
7008  the global_work_size vector.");
7009  }
7010  if (temp_sz == 1){
7011  size[0] = params->global_work_size[0];
7012  work_dimension = 1;
7013  }
7014  else if (temp_sz == 2){
7015  size[0] = params->global_work_size[0];
7016  size[1] = params->global_work_size[1];
7017  work_dimension = 2;
7018  }
7019  else{
7020  size[0] = params->global_work_size[0];
7021  size[1] = params->global_work_size[1];
7022  size[2] = params->global_work_size[2];
7023  if (temp_sz > 3){
7024  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7025  }
7026  }
7027  }
7028 
7029  cl_event gpuExec;
7030 
7031  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7032 
7033  clWaitForEvents(1, &gpuExec);
7034 
7035  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7036 
7037  clWaitForEvents(1, &gpuExec);
7038 
7039  char *result = (char *) malloc(typesz);
7040  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7041 
7042  v->assign(result, result+sz);
7043 
7044  clReleaseCommandQueue (queue);
7045  clReleaseMemObject(buffer);
7046  clReleaseMemObject(buffer2);
7047  clReleaseMemObject(buffer3);
7048  clReleaseEvent(gpuExec);
7049  free(result);
7050  }
7051 
7052  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
7053  size_t sz = v->size();
7054  size_t sz2 = v2->size();
7055  size_t sz3 = v3.size();
7056  size_t typesz = sizeof(char) * sz;
7057  size_t typesz2 = sizeof(int) * sz2;
7058  size_t typesz3 = sizeof(int) * sz3;
7059  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7060  if (temp_sz > 0){
7061  if (temp_sz > 2){
7062  if (temp_sz > 3){
7063  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7064  }
7065  typesz = sizeof(char) * params->buffers_size[0];
7066  typesz2 = sizeof(int) * params->buffers_size[1];
7067  typesz3 = sizeof(int) * params->buffers_size[2];
7068  }
7069  else{
7070  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7071  }
7072  }
7073  cl_int error = 0;
7074  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7075  checkError(error);
7076  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7077  checkError(error);
7078  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7079  checkError(error);
7080  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7081  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7082  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7083  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7084 
7085  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7086  checkError (error);
7087  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7088  checkError (error);
7089  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7090  checkError (error);
7091 
7092  size_t size[3] = {sz, sz2, sz3};
7093  size_t work_dimension = 3;
7094 
7095  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7096  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7097  work_dimension = 1;
7098  }
7099  else if(temp_sz > 0){
7100  if (params->multi_dimensional){
7101  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7102  For default multidimensional global work size, leave the global_work_size vector empty, \
7103  and set multi_dimensional to true. Setting the global work size based on the values inside \
7104  the global_work_size vector.");
7105  }
7106  if (temp_sz == 1){
7107  size[0] = params->global_work_size[0];
7108  work_dimension = 1;
7109  }
7110  else if (temp_sz == 2){
7111  size[0] = params->global_work_size[0];
7112  size[1] = params->global_work_size[1];
7113  work_dimension = 2;
7114  }
7115  else{
7116  size[0] = params->global_work_size[0];
7117  size[1] = params->global_work_size[1];
7118  size[2] = params->global_work_size[2];
7119  if (temp_sz > 3){
7120  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7121  }
7122  }
7123  }
7124 
7125  cl_event gpuExec;
7126 
7127  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7128 
7129  clWaitForEvents(1, &gpuExec);
7130 
7131  char *result = (char *) malloc(typesz);
7132  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7133 
7134  v->assign(result, result+sz);
7135 
7136  if (typesz2 != typesz or sz != sz2){
7137  int *result2;
7138  result2 = (int *) malloc(typesz2);
7139  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7140 
7141  v2->assign(result2, result2+sz2);
7142  free(result2);
7143  }
7144  else{
7145  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7146 
7147  v2->assign(result, result+sz2);
7148  }
7149 
7150  clReleaseCommandQueue (queue);
7151  clReleaseMemObject(buffer);
7152  clReleaseMemObject(buffer2);
7153  clReleaseMemObject(buffer3);
7154  clReleaseEvent(gpuExec);
7155  free(result);
7156  }
7157 
7158  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
7159  size_t sz = v->size();
7160  size_t sz2 = v2->size();
7161  size_t sz3 = v3->size();
7162  size_t typesz = sizeof(char) * sz;
7163  size_t typesz2 = sizeof(int) * sz2;
7164  size_t typesz3 = sizeof(int) * sz3;
7165  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7166  if (temp_sz > 0){
7167  if (temp_sz > 2){
7168  if (temp_sz > 3){
7169  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7170  }
7171  typesz = sizeof(char) * params->buffers_size[0];
7172  typesz2 = sizeof(int) * params->buffers_size[1];
7173  typesz3 = sizeof(int) * params->buffers_size[2];
7174  }
7175  else{
7176  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7177  }
7178  }
7179  cl_int error = 0;
7180  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7181  checkError(error);
7182  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7183  checkError(error);
7184  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
7185  checkError(error);
7186  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7187  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7188  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7189  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7190 
7191  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7192  checkError (error);
7193  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7194  checkError (error);
7195  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
7196  checkError (error);
7197 
7198  size_t size[3] = {sz, sz2, sz3};
7199  size_t work_dimension = 3;
7200 
7201  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7202  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7203  work_dimension = 1;
7204  }
7205  else if(temp_sz > 0){
7206  if (params->multi_dimensional){
7207  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7208  For default multidimensional global work size, leave the global_work_size vector empty, \
7209  and set multi_dimensional to true. Setting the global work size based on the values inside \
7210  the global_work_size vector.");
7211  }
7212  if (temp_sz == 1){
7213  size[0] = params->global_work_size[0];
7214  work_dimension = 1;
7215  }
7216  else if (temp_sz == 2){
7217  size[0] = params->global_work_size[0];
7218  size[1] = params->global_work_size[1];
7219  work_dimension = 2;
7220  }
7221  else{
7222  size[0] = params->global_work_size[0];
7223  size[1] = params->global_work_size[1];
7224  size[2] = params->global_work_size[2];
7225  if (temp_sz > 3){
7226  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7227  }
7228  }
7229  }
7230 
7231  cl_event gpuExec;
7232 
7233  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7234 
7235  clWaitForEvents(1, &gpuExec);
7236 
7237  char *result = (char *) malloc(typesz);
7238  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7239 
7240  v->assign(result, result+sz);
7241 
7242  if (typesz2 != typesz or sz != sz2){
7243  int *result2;
7244  result2 = (int *) malloc(typesz2);
7245  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7246 
7247  v2->assign(result2, result2+sz2);
7248  free(result2);
7249  }
7250  else{
7251  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7252 
7253  v2->assign(result, result+sz2);
7254  }
7255 
7256  if (typesz3 != typesz or sz != sz3){
7257  int *result3;
7258  result3 = (int *) malloc(typesz3);
7259  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
7260 
7261  v3->assign(result3, result3+sz3);
7262  free(result3);
7263  }
7264  else{
7265  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7266 
7267  v3->assign(result, result+sz3);
7268  }
7269 
7270  clReleaseCommandQueue (queue);
7271  clReleaseMemObject(buffer);
7272  clReleaseMemObject(buffer2);
7273  clReleaseMemObject(buffer3);
7274  clReleaseEvent(gpuExec);
7275  free(result);
7276  }
7277 
7278 
7279  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
7280  size_t sz = v.size();
7281  size_t sz2 = v2.size();
7282  size_t sz3 = v3.size();
7283  size_t typesz = sizeof(char) * sz;
7284  size_t typesz2 = sizeof(int) * sz2;
7285  size_t typesz3 = sizeof(float) * sz3;
7286  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7287  if (temp_sz > 0){
7288  if (temp_sz > 2){
7289  if (temp_sz > 3){
7290  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7291  }
7292  typesz = sizeof(char) * params->buffers_size[0];
7293  typesz2 = sizeof(int) * params->buffers_size[1];
7294  typesz3 = sizeof(float) * params->buffers_size[2];
7295  }
7296  else{
7297  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7298  }
7299  }
7300  cl_int error = 0;
7301  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7302  checkError(error);
7303  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7304  checkError(error);
7305  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7306  checkError(error);
7307  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7308  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7309  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7310  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7311 
7312  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
7313  checkError (error);
7314  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7315  checkError (error);
7316  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7317  checkError (error);
7318 
7319  size_t size[3] = {sz, sz2, sz3};
7320  size_t work_dimension = 3;
7321 
7322  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7323  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7324  work_dimension = 1;
7325  }
7326  else if(temp_sz > 0){
7327  if (params->multi_dimensional){
7328  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7329  For default multidimensional global work size, leave the global_work_size vector empty, \
7330  and set multi_dimensional to true. Setting the global work size based on the values inside \
7331  the global_work_size vector.");
7332  }
7333  if (temp_sz == 1){
7334  size[0] = params->global_work_size[0];
7335  work_dimension = 1;
7336  }
7337  else if (temp_sz == 2){
7338  size[0] = params->global_work_size[0];
7339  size[1] = params->global_work_size[1];
7340  work_dimension = 2;
7341  }
7342  else{
7343  size[0] = params->global_work_size[0];
7344  size[1] = params->global_work_size[1];
7345  size[2] = params->global_work_size[2];
7346  if (temp_sz > 3){
7347  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7348  }
7349  }
7350  }
7351 
7352  cl_event gpuExec;
7353 
7354  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7355 
7356  clWaitForEvents(1, &gpuExec);
7357 
7358  char *result = (char *) malloc(typesz);
7359  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7360 
7361  std::vector<char> res = std::vector<char>();
7362  res.assign(result, result+sz);
7363 
7364  clReleaseCommandQueue (queue);
7365  clReleaseMemObject(buffer);
7366  clReleaseMemObject(buffer2);
7367  clReleaseMemObject(buffer3);
7368  clReleaseEvent(gpuExec);
7369  free(result);
7370 
7371  return res;
7372  }
7373 
7374  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
7375  size_t sz = v->size();
7376  size_t sz2 = v2.size();
7377  size_t sz3 = v3.size();
7378  size_t typesz = sizeof(char) * sz;
7379  size_t typesz2 = sizeof(int) * sz2;
7380  size_t typesz3 = sizeof(float) * sz3;
7381  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7382  if (temp_sz > 0){
7383  if (temp_sz > 2){
7384  if (temp_sz > 3){
7385  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7386  }
7387  typesz = sizeof(char) * params->buffers_size[0];
7388  typesz2 = sizeof(int) * params->buffers_size[1];
7389  typesz3 = sizeof(float) * params->buffers_size[2];
7390  }
7391  else{
7392  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7393  }
7394  }
7395  cl_int error = 0;
7396  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7397  checkError(error);
7398  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7399  checkError(error);
7400  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7401  checkError(error);
7402  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7403  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7404  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7405  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7406 
7407  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7408  checkError (error);
7409  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7410  checkError (error);
7411  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7412  checkError (error);
7413 
7414  size_t size[3] = {sz, sz2, sz3};
7415  size_t work_dimension = 3;
7416 
7417  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7418  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7419  work_dimension = 1;
7420  }
7421  else if(temp_sz > 0){
7422  if (params->multi_dimensional){
7423  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7424  For default multidimensional global work size, leave the global_work_size vector empty, \
7425  and set multi_dimensional to true. Setting the global work size based on the values inside \
7426  the global_work_size vector.");
7427  }
7428  if (temp_sz == 1){
7429  size[0] = params->global_work_size[0];
7430  work_dimension = 1;
7431  }
7432  else if (temp_sz == 2){
7433  size[0] = params->global_work_size[0];
7434  size[1] = params->global_work_size[1];
7435  work_dimension = 2;
7436  }
7437  else{
7438  size[0] = params->global_work_size[0];
7439  size[1] = params->global_work_size[1];
7440  size[2] = params->global_work_size[2];
7441  if (temp_sz > 3){
7442  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7443  }
7444  }
7445  }
7446 
7447  cl_event gpuExec;
7448 
7449  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7450 
7451  clWaitForEvents(1, &gpuExec);
7452 
7453  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7454 
7455  clWaitForEvents(1, &gpuExec);
7456 
7457  char *result = (char *) malloc(typesz);
7458  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7459 
7460  v->assign(result, result+sz);
7461 
7462  clReleaseCommandQueue (queue);
7463  clReleaseMemObject(buffer);
7464  clReleaseMemObject(buffer2);
7465  clReleaseMemObject(buffer3);
7466  clReleaseEvent(gpuExec);
7467  free(result);
7468  }
7469 
7470  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
7471  size_t sz = v->size();
7472  size_t sz2 = v2->size();
7473  size_t sz3 = v3.size();
7474  size_t typesz = sizeof(char) * sz;
7475  size_t typesz2 = sizeof(int) * sz2;
7476  size_t typesz3 = sizeof(float) * sz3;
7477  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7478  if (temp_sz > 0){
7479  if (temp_sz > 2){
7480  if (temp_sz > 3){
7481  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7482  }
7483  typesz = sizeof(char) * params->buffers_size[0];
7484  typesz2 = sizeof(int) * params->buffers_size[1];
7485  typesz3 = sizeof(float) * params->buffers_size[2];
7486  }
7487  else{
7488  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7489  }
7490  }
7491  cl_int error = 0;
7492  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7493  checkError(error);
7494  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7495  checkError(error);
7496  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7497  checkError(error);
7498  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7499  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7500  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7501  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7502 
7503  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7504  checkError (error);
7505  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7506  checkError (error);
7507  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7508  checkError (error);
7509 
7510  size_t size[3] = {sz, sz2, sz3};
7511  size_t work_dimension = 3;
7512 
7513  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7514  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7515  work_dimension = 1;
7516  }
7517  else if(temp_sz > 0){
7518  if (params->multi_dimensional){
7519  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7520  For default multidimensional global work size, leave the global_work_size vector empty, \
7521  and set multi_dimensional to true. Setting the global work size based on the values inside \
7522  the global_work_size vector.");
7523  }
7524  if (temp_sz == 1){
7525  size[0] = params->global_work_size[0];
7526  work_dimension = 1;
7527  }
7528  else if (temp_sz == 2){
7529  size[0] = params->global_work_size[0];
7530  size[1] = params->global_work_size[1];
7531  work_dimension = 2;
7532  }
7533  else{
7534  size[0] = params->global_work_size[0];
7535  size[1] = params->global_work_size[1];
7536  size[2] = params->global_work_size[2];
7537  if (temp_sz > 3){
7538  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7539  }
7540  }
7541  }
7542 
7543  cl_event gpuExec;
7544 
7545  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7546 
7547  clWaitForEvents(1, &gpuExec);
7548 
7549  char *result = (char *) malloc(typesz);
7550  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7551 
7552  v->assign(result, result+sz);
7553 
7554  if (typesz2 != typesz or sz != sz2){
7555  int *result2;
7556  result2 = (int *) malloc(typesz2);
7557  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7558 
7559  v2->assign(result2, result2+sz2);
7560  free(result2);
7561  }
7562  else{
7563  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7564 
7565  v2->assign(result, result+sz2);
7566  }
7567 
7568  clReleaseCommandQueue (queue);
7569  clReleaseMemObject(buffer);
7570  clReleaseMemObject(buffer2);
7571  clReleaseMemObject(buffer3);
7572  clReleaseEvent(gpuExec);
7573  free(result);
7574  }
7575 
7576  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
7577  size_t sz = v->size();
7578  size_t sz2 = v2->size();
7579  size_t sz3 = v3->size();
7580  size_t typesz = sizeof(char) * sz;
7581  size_t typesz2 = sizeof(int) * sz2;
7582  size_t typesz3 = sizeof(float) * sz3;
7583  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7584  if (temp_sz > 0){
7585  if (temp_sz > 2){
7586  if (temp_sz > 3){
7587  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7588  }
7589  typesz = sizeof(char) * params->buffers_size[0];
7590  typesz2 = sizeof(int) * params->buffers_size[1];
7591  typesz3 = sizeof(float) * params->buffers_size[2];
7592  }
7593  else{
7594  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7595  }
7596  }
7597  cl_int error = 0;
7598  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7599  checkError(error);
7600  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7601  checkError(error);
7602  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
7603  checkError(error);
7604  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7605  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7606  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7607  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7608 
7609  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7610  checkError (error);
7611  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7612  checkError (error);
7613  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
7614  checkError (error);
7615 
7616  size_t size[3] = {sz, sz2, sz3};
7617  size_t work_dimension = 3;
7618 
7619  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7620  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7621  work_dimension = 1;
7622  }
7623  else if(temp_sz > 0){
7624  if (params->multi_dimensional){
7625  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7626  For default multidimensional global work size, leave the global_work_size vector empty, \
7627  and set multi_dimensional to true. Setting the global work size based on the values inside \
7628  the global_work_size vector.");
7629  }
7630  if (temp_sz == 1){
7631  size[0] = params->global_work_size[0];
7632  work_dimension = 1;
7633  }
7634  else if (temp_sz == 2){
7635  size[0] = params->global_work_size[0];
7636  size[1] = params->global_work_size[1];
7637  work_dimension = 2;
7638  }
7639  else{
7640  size[0] = params->global_work_size[0];
7641  size[1] = params->global_work_size[1];
7642  size[2] = params->global_work_size[2];
7643  if (temp_sz > 3){
7644  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7645  }
7646  }
7647  }
7648 
7649  cl_event gpuExec;
7650 
7651  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7652 
7653  clWaitForEvents(1, &gpuExec);
7654 
7655  char *result = (char *) malloc(typesz);
7656  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7657 
7658  v->assign(result, result+sz);
7659 
7660  if (typesz2 != typesz or sz != sz2){
7661  int *result2;
7662  result2 = (int *) malloc(typesz2);
7663  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7664 
7665  v2->assign(result2, result2+sz2);
7666  free(result2);
7667  }
7668  else{
7669  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7670 
7671  v2->assign(result, result+sz2);
7672  }
7673 
7674  if (typesz3 != typesz or sz != sz3){
7675  float *result3;
7676  result3 = (float *) malloc(typesz3);
7677  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
7678 
7679  v3->assign(result3, result3+sz3);
7680  free(result3);
7681  }
7682  else{
7683  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7684 
7685  v3->assign(result, result+sz3);
7686  }
7687 
7688  clReleaseCommandQueue (queue);
7689  clReleaseMemObject(buffer);
7690  clReleaseMemObject(buffer2);
7691  clReleaseMemObject(buffer3);
7692  clReleaseEvent(gpuExec);
7693  free(result);
7694  }
7695 
7696 
7697  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
7698  size_t sz = v.size();
7699  size_t sz2 = v2.size();
7700  size_t sz3 = v3.size();
7701  size_t typesz = sizeof(char) * sz;
7702  size_t typesz2 = sizeof(int) * sz2;
7703  size_t typesz3 = sizeof(double) * sz3;
7704  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7705  if (temp_sz > 0){
7706  if (temp_sz > 2){
7707  if (temp_sz > 3){
7708  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7709  }
7710  typesz = sizeof(char) * params->buffers_size[0];
7711  typesz2 = sizeof(int) * params->buffers_size[1];
7712  typesz3 = sizeof(double) * params->buffers_size[2];
7713  }
7714  else{
7715  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7716  }
7717  }
7718  cl_int error = 0;
7719  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7720  checkError(error);
7721  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7722  checkError(error);
7723  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7724  checkError(error);
7725  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7726  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7727  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7728  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7729 
7730  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
7731  checkError (error);
7732  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7733  checkError (error);
7734  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7735  checkError (error);
7736 
7737  size_t size[3] = {sz, sz2, sz3};
7738  size_t work_dimension = 3;
7739 
7740  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7741  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7742  work_dimension = 1;
7743  }
7744  else if(temp_sz > 0){
7745  if (params->multi_dimensional){
7746  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7747  For default multidimensional global work size, leave the global_work_size vector empty, \
7748  and set multi_dimensional to true. Setting the global work size based on the values inside \
7749  the global_work_size vector.");
7750  }
7751  if (temp_sz == 1){
7752  size[0] = params->global_work_size[0];
7753  work_dimension = 1;
7754  }
7755  else if (temp_sz == 2){
7756  size[0] = params->global_work_size[0];
7757  size[1] = params->global_work_size[1];
7758  work_dimension = 2;
7759  }
7760  else{
7761  size[0] = params->global_work_size[0];
7762  size[1] = params->global_work_size[1];
7763  size[2] = params->global_work_size[2];
7764  if (temp_sz > 3){
7765  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7766  }
7767  }
7768  }
7769 
7770  cl_event gpuExec;
7771 
7772  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7773 
7774  clWaitForEvents(1, &gpuExec);
7775 
7776  char *result = (char *) malloc(typesz);
7777  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7778 
7779  std::vector<char> res = std::vector<char>();
7780  res.assign(result, result+sz);
7781 
7782  clReleaseCommandQueue (queue);
7783  clReleaseMemObject(buffer);
7784  clReleaseMemObject(buffer2);
7785  clReleaseMemObject(buffer3);
7786  clReleaseEvent(gpuExec);
7787  free(result);
7788 
7789  return res;
7790  }
7791 
7792  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
7793  size_t sz = v->size();
7794  size_t sz2 = v2.size();
7795  size_t sz3 = v3.size();
7796  size_t typesz = sizeof(char) * sz;
7797  size_t typesz2 = sizeof(int) * sz2;
7798  size_t typesz3 = sizeof(double) * sz3;
7799  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7800  if (temp_sz > 0){
7801  if (temp_sz > 2){
7802  if (temp_sz > 3){
7803  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7804  }
7805  typesz = sizeof(char) * params->buffers_size[0];
7806  typesz2 = sizeof(int) * params->buffers_size[1];
7807  typesz3 = sizeof(double) * params->buffers_size[2];
7808  }
7809  else{
7810  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7811  }
7812  }
7813  cl_int error = 0;
7814  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7815  checkError(error);
7816  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7817  checkError(error);
7818  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7819  checkError(error);
7820  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7821  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7822  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7823  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7824 
7825  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7826  checkError (error);
7827  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7828  checkError (error);
7829  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7830  checkError (error);
7831 
7832  size_t size[3] = {sz, sz2, sz3};
7833  size_t work_dimension = 3;
7834 
7835  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7836  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7837  work_dimension = 1;
7838  }
7839  else if(temp_sz > 0){
7840  if (params->multi_dimensional){
7841  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7842  For default multidimensional global work size, leave the global_work_size vector empty, \
7843  and set multi_dimensional to true. Setting the global work size based on the values inside \
7844  the global_work_size vector.");
7845  }
7846  if (temp_sz == 1){
7847  size[0] = params->global_work_size[0];
7848  work_dimension = 1;
7849  }
7850  else if (temp_sz == 2){
7851  size[0] = params->global_work_size[0];
7852  size[1] = params->global_work_size[1];
7853  work_dimension = 2;
7854  }
7855  else{
7856  size[0] = params->global_work_size[0];
7857  size[1] = params->global_work_size[1];
7858  size[2] = params->global_work_size[2];
7859  if (temp_sz > 3){
7860  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7861  }
7862  }
7863  }
7864 
7865  cl_event gpuExec;
7866 
7867  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7868 
7869  clWaitForEvents(1, &gpuExec);
7870 
7871  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7872 
7873  clWaitForEvents(1, &gpuExec);
7874 
7875  char *result = (char *) malloc(typesz);
7876  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7877 
7878  v->assign(result, result+sz);
7879 
7880  clReleaseCommandQueue (queue);
7881  clReleaseMemObject(buffer);
7882  clReleaseMemObject(buffer2);
7883  clReleaseMemObject(buffer3);
7884  clReleaseEvent(gpuExec);
7885  free(result);
7886  }
7887 
7888  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
7889  size_t sz = v->size();
7890  size_t sz2 = v2->size();
7891  size_t sz3 = v3.size();
7892  size_t typesz = sizeof(char) * sz;
7893  size_t typesz2 = sizeof(int) * sz2;
7894  size_t typesz3 = sizeof(double) * sz3;
7895  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
7896  if (temp_sz > 0){
7897  if (temp_sz > 2){
7898  if (temp_sz > 3){
7899  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7900  }
7901  typesz = sizeof(char) * params->buffers_size[0];
7902  typesz2 = sizeof(int) * params->buffers_size[1];
7903  typesz3 = sizeof(double) * params->buffers_size[2];
7904  }
7905  else{
7906  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7907  }
7908  }
7909  cl_int error = 0;
7910  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7911  checkError(error);
7912  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7913  checkError(error);
7914  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7915  checkError(error);
7916  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
7917  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
7918  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
7919  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
7920 
7921  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7922  checkError (error);
7923  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7924  checkError (error);
7925  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7926  checkError (error);
7927 
7928  size_t size[3] = {sz, sz2, sz3};
7929  size_t work_dimension = 3;
7930 
7931  temp_sz = params != NULL ? params->global_work_size.size() : 0;
7932  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
7933  work_dimension = 1;
7934  }
7935  else if(temp_sz > 0){
7936  if (params->multi_dimensional){
7937  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
7938  For default multidimensional global work size, leave the global_work_size vector empty, \
7939  and set multi_dimensional to true. Setting the global work size based on the values inside \
7940  the global_work_size vector.");
7941  }
7942  if (temp_sz == 1){
7943  size[0] = params->global_work_size[0];
7944  work_dimension = 1;
7945  }
7946  else if (temp_sz == 2){
7947  size[0] = params->global_work_size[0];
7948  size[1] = params->global_work_size[1];
7949  work_dimension = 2;
7950  }
7951  else{
7952  size[0] = params->global_work_size[0];
7953  size[1] = params->global_work_size[1];
7954  size[2] = params->global_work_size[2];
7955  if (temp_sz > 3){
7956  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7957  }
7958  }
7959  }
7960 
7961  cl_event gpuExec;
7962 
7963  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7964 
7965  clWaitForEvents(1, &gpuExec);
7966 
7967  char *result = (char *) malloc(typesz);
7968  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7969 
7970  v->assign(result, result+sz);
7971 
7972  if (typesz2 != typesz or sz != sz2){
7973  int *result2;
7974  result2 = (int *) malloc(typesz2);
7975  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7976 
7977  v2->assign(result2, result2+sz2);
7978  free(result2);
7979  }
7980  else{
7981  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7982 
7983  v2->assign(result, result+sz2);
7984  }
7985 
7986  clReleaseCommandQueue (queue);
7987  clReleaseMemObject(buffer);
7988  clReleaseMemObject(buffer2);
7989  clReleaseMemObject(buffer3);
7990  clReleaseEvent(gpuExec);
7991  free(result);
7992  }
7993 
7994  void ROS_OpenCL::process(std::vector<char>* v, std::vector<int>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
7995  size_t sz = v->size();
7996  size_t sz2 = v2->size();
7997  size_t sz3 = v3->size();
7998  size_t typesz = sizeof(char) * sz;
7999  size_t typesz2 = sizeof(int) * sz2;
8000  size_t typesz3 = sizeof(double) * sz3;
8001  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8002  if (temp_sz > 0){
8003  if (temp_sz > 2){
8004  if (temp_sz > 3){
8005  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8006  }
8007  typesz = sizeof(char) * params->buffers_size[0];
8008  typesz2 = sizeof(int) * params->buffers_size[1];
8009  typesz3 = sizeof(double) * params->buffers_size[2];
8010  }
8011  else{
8012  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8013  }
8014  }
8015  cl_int error = 0;
8016  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8017  checkError(error);
8018  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8019  checkError(error);
8020  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8021  checkError(error);
8022  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8023  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8024  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8025  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8026 
8027  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8028  checkError (error);
8029  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8030  checkError (error);
8031  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8032  checkError (error);
8033 
8034  size_t size[3] = {sz, sz2, sz3};
8035  size_t work_dimension = 3;
8036 
8037  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8038  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8039  work_dimension = 1;
8040  }
8041  else if(temp_sz > 0){
8042  if (params->multi_dimensional){
8043  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8044  For default multidimensional global work size, leave the global_work_size vector empty, \
8045  and set multi_dimensional to true. Setting the global work size based on the values inside \
8046  the global_work_size vector.");
8047  }
8048  if (temp_sz == 1){
8049  size[0] = params->global_work_size[0];
8050  work_dimension = 1;
8051  }
8052  else if (temp_sz == 2){
8053  size[0] = params->global_work_size[0];
8054  size[1] = params->global_work_size[1];
8055  work_dimension = 2;
8056  }
8057  else{
8058  size[0] = params->global_work_size[0];
8059  size[1] = params->global_work_size[1];
8060  size[2] = params->global_work_size[2];
8061  if (temp_sz > 3){
8062  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8063  }
8064  }
8065  }
8066 
8067  cl_event gpuExec;
8068 
8069  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8070 
8071  clWaitForEvents(1, &gpuExec);
8072 
8073  char *result = (char *) malloc(typesz);
8074  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8075 
8076  v->assign(result, result+sz);
8077 
8078  if (typesz2 != typesz or sz != sz2){
8079  int *result2;
8080  result2 = (int *) malloc(typesz2);
8081  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8082 
8083  v2->assign(result2, result2+sz2);
8084  free(result2);
8085  }
8086  else{
8087  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8088 
8089  v2->assign(result, result+sz2);
8090  }
8091 
8092  if (typesz3 != typesz or sz != sz3){
8093  double *result3;
8094  result3 = (double *) malloc(typesz3);
8095  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8096 
8097  v3->assign(result3, result3+sz3);
8098  free(result3);
8099  }
8100  else{
8101  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8102 
8103  v3->assign(result, result+sz3);
8104  }
8105 
8106  clReleaseCommandQueue (queue);
8107  clReleaseMemObject(buffer);
8108  clReleaseMemObject(buffer2);
8109  clReleaseMemObject(buffer3);
8110  clReleaseEvent(gpuExec);
8111  free(result);
8112  }
8113 
8114 
8115  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
8116  size_t sz = v.size();
8117  size_t sz2 = v2.size();
8118  size_t sz3 = v3.size();
8119  size_t typesz = sizeof(char) * sz;
8120  size_t typesz2 = sizeof(float) * sz2;
8121  size_t typesz3 = sizeof(char) * sz3;
8122  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8123  if (temp_sz > 0){
8124  if (temp_sz > 2){
8125  if (temp_sz > 3){
8126  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8127  }
8128  typesz = sizeof(char) * params->buffers_size[0];
8129  typesz2 = sizeof(float) * params->buffers_size[1];
8130  typesz3 = sizeof(char) * params->buffers_size[2];
8131  }
8132  else{
8133  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8134  }
8135  }
8136  cl_int error = 0;
8137  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8138  checkError(error);
8139  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8140  checkError(error);
8141  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8142  checkError(error);
8143  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8144  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8145  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8146  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8147 
8148  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8149  checkError (error);
8150  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8151  checkError (error);
8152  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8153  checkError (error);
8154 
8155  size_t size[3] = {sz, sz2, sz3};
8156  size_t work_dimension = 3;
8157 
8158  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8159  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8160  work_dimension = 1;
8161  }
8162  else if(temp_sz > 0){
8163  if (params->multi_dimensional){
8164  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8165  For default multidimensional global work size, leave the global_work_size vector empty, \
8166  and set multi_dimensional to true. Setting the global work size based on the values inside \
8167  the global_work_size vector.");
8168  }
8169  if (temp_sz == 1){
8170  size[0] = params->global_work_size[0];
8171  work_dimension = 1;
8172  }
8173  else if (temp_sz == 2){
8174  size[0] = params->global_work_size[0];
8175  size[1] = params->global_work_size[1];
8176  work_dimension = 2;
8177  }
8178  else{
8179  size[0] = params->global_work_size[0];
8180  size[1] = params->global_work_size[1];
8181  size[2] = params->global_work_size[2];
8182  if (temp_sz > 3){
8183  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8184  }
8185  }
8186  }
8187 
8188  cl_event gpuExec;
8189 
8190  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8191 
8192  clWaitForEvents(1, &gpuExec);
8193 
8194  char *result = (char *) malloc(typesz);
8195  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8196 
8197  std::vector<char> res = std::vector<char>();
8198  res.assign(result, result+sz);
8199 
8200  clReleaseCommandQueue (queue);
8201  clReleaseMemObject(buffer);
8202  clReleaseMemObject(buffer2);
8203  clReleaseMemObject(buffer3);
8204  clReleaseEvent(gpuExec);
8205  free(result);
8206 
8207  return res;
8208  }
8209 
8210  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
8211  size_t sz = v->size();
8212  size_t sz2 = v2.size();
8213  size_t sz3 = v3.size();
8214  size_t typesz = sizeof(char) * sz;
8215  size_t typesz2 = sizeof(float) * sz2;
8216  size_t typesz3 = sizeof(char) * sz3;
8217  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8218  if (temp_sz > 0){
8219  if (temp_sz > 2){
8220  if (temp_sz > 3){
8221  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8222  }
8223  typesz = sizeof(char) * params->buffers_size[0];
8224  typesz2 = sizeof(float) * params->buffers_size[1];
8225  typesz3 = sizeof(char) * params->buffers_size[2];
8226  }
8227  else{
8228  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8229  }
8230  }
8231  cl_int error = 0;
8232  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8233  checkError(error);
8234  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8235  checkError(error);
8236  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8237  checkError(error);
8238  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8239  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8240  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8241  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8242 
8243  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8244  checkError (error);
8245  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8246  checkError (error);
8247  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8248  checkError (error);
8249 
8250  size_t size[3] = {sz, sz2, sz3};
8251  size_t work_dimension = 3;
8252 
8253  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8254  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8255  work_dimension = 1;
8256  }
8257  else if(temp_sz > 0){
8258  if (params->multi_dimensional){
8259  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8260  For default multidimensional global work size, leave the global_work_size vector empty, \
8261  and set multi_dimensional to true. Setting the global work size based on the values inside \
8262  the global_work_size vector.");
8263  }
8264  if (temp_sz == 1){
8265  size[0] = params->global_work_size[0];
8266  work_dimension = 1;
8267  }
8268  else if (temp_sz == 2){
8269  size[0] = params->global_work_size[0];
8270  size[1] = params->global_work_size[1];
8271  work_dimension = 2;
8272  }
8273  else{
8274  size[0] = params->global_work_size[0];
8275  size[1] = params->global_work_size[1];
8276  size[2] = params->global_work_size[2];
8277  if (temp_sz > 3){
8278  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8279  }
8280  }
8281  }
8282 
8283  cl_event gpuExec;
8284 
8285  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8286 
8287  clWaitForEvents(1, &gpuExec);
8288 
8289  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8290 
8291  clWaitForEvents(1, &gpuExec);
8292 
8293  char *result = (char *) malloc(typesz);
8294  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8295 
8296  v->assign(result, result+sz);
8297 
8298  clReleaseCommandQueue (queue);
8299  clReleaseMemObject(buffer);
8300  clReleaseMemObject(buffer2);
8301  clReleaseMemObject(buffer3);
8302  clReleaseEvent(gpuExec);
8303  free(result);
8304  }
8305 
8306  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
8307  size_t sz = v->size();
8308  size_t sz2 = v2->size();
8309  size_t sz3 = v3.size();
8310  size_t typesz = sizeof(char) * sz;
8311  size_t typesz2 = sizeof(float) * sz2;
8312  size_t typesz3 = sizeof(char) * sz3;
8313  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8314  if (temp_sz > 0){
8315  if (temp_sz > 2){
8316  if (temp_sz > 3){
8317  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8318  }
8319  typesz = sizeof(char) * params->buffers_size[0];
8320  typesz2 = sizeof(float) * params->buffers_size[1];
8321  typesz3 = sizeof(char) * params->buffers_size[2];
8322  }
8323  else{
8324  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8325  }
8326  }
8327  cl_int error = 0;
8328  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8329  checkError(error);
8330  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8331  checkError(error);
8332  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8333  checkError(error);
8334  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8335  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8336  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8337  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8338 
8339  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8340  checkError (error);
8341  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8342  checkError (error);
8343  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8344  checkError (error);
8345 
8346  size_t size[3] = {sz, sz2, sz3};
8347  size_t work_dimension = 3;
8348 
8349  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8350  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8351  work_dimension = 1;
8352  }
8353  else if(temp_sz > 0){
8354  if (params->multi_dimensional){
8355  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8356  For default multidimensional global work size, leave the global_work_size vector empty, \
8357  and set multi_dimensional to true. Setting the global work size based on the values inside \
8358  the global_work_size vector.");
8359  }
8360  if (temp_sz == 1){
8361  size[0] = params->global_work_size[0];
8362  work_dimension = 1;
8363  }
8364  else if (temp_sz == 2){
8365  size[0] = params->global_work_size[0];
8366  size[1] = params->global_work_size[1];
8367  work_dimension = 2;
8368  }
8369  else{
8370  size[0] = params->global_work_size[0];
8371  size[1] = params->global_work_size[1];
8372  size[2] = params->global_work_size[2];
8373  if (temp_sz > 3){
8374  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8375  }
8376  }
8377  }
8378 
8379  cl_event gpuExec;
8380 
8381  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8382 
8383  clWaitForEvents(1, &gpuExec);
8384 
8385  char *result = (char *) malloc(typesz);
8386  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8387 
8388  v->assign(result, result+sz);
8389 
8390  if (typesz2 != typesz or sz != sz2){
8391  float *result2;
8392  result2 = (float *) malloc(typesz2);
8393  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8394 
8395  v2->assign(result2, result2+sz2);
8396  free(result2);
8397  }
8398  else{
8399  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8400 
8401  v2->assign(result, result+sz2);
8402  }
8403 
8404  clReleaseCommandQueue (queue);
8405  clReleaseMemObject(buffer);
8406  clReleaseMemObject(buffer2);
8407  clReleaseMemObject(buffer3);
8408  clReleaseEvent(gpuExec);
8409  free(result);
8410  }
8411 
8412  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
8413  size_t sz = v->size();
8414  size_t sz2 = v2->size();
8415  size_t sz3 = v3->size();
8416  size_t typesz = sizeof(char) * sz;
8417  size_t typesz2 = sizeof(float) * sz2;
8418  size_t typesz3 = sizeof(char) * sz3;
8419  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8420  if (temp_sz > 0){
8421  if (temp_sz > 2){
8422  if (temp_sz > 3){
8423  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8424  }
8425  typesz = sizeof(char) * params->buffers_size[0];
8426  typesz2 = sizeof(float) * params->buffers_size[1];
8427  typesz3 = sizeof(char) * params->buffers_size[2];
8428  }
8429  else{
8430  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8431  }
8432  }
8433  cl_int error = 0;
8434  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8435  checkError(error);
8436  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8437  checkError(error);
8438  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8439  checkError(error);
8440  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8441  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8442  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8443  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8444 
8445  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8446  checkError (error);
8447  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8448  checkError (error);
8449  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8450  checkError (error);
8451 
8452  size_t size[3] = {sz, sz2, sz3};
8453  size_t work_dimension = 3;
8454 
8455  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8456  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8457  work_dimension = 1;
8458  }
8459  else if(temp_sz > 0){
8460  if (params->multi_dimensional){
8461  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8462  For default multidimensional global work size, leave the global_work_size vector empty, \
8463  and set multi_dimensional to true. Setting the global work size based on the values inside \
8464  the global_work_size vector.");
8465  }
8466  if (temp_sz == 1){
8467  size[0] = params->global_work_size[0];
8468  work_dimension = 1;
8469  }
8470  else if (temp_sz == 2){
8471  size[0] = params->global_work_size[0];
8472  size[1] = params->global_work_size[1];
8473  work_dimension = 2;
8474  }
8475  else{
8476  size[0] = params->global_work_size[0];
8477  size[1] = params->global_work_size[1];
8478  size[2] = params->global_work_size[2];
8479  if (temp_sz > 3){
8480  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8481  }
8482  }
8483  }
8484 
8485  cl_event gpuExec;
8486 
8487  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8488 
8489  clWaitForEvents(1, &gpuExec);
8490 
8491  char *result = (char *) malloc(typesz);
8492  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8493 
8494  v->assign(result, result+sz);
8495 
8496  if (typesz2 != typesz or sz != sz2){
8497  float *result2;
8498  result2 = (float *) malloc(typesz2);
8499  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8500 
8501  v2->assign(result2, result2+sz2);
8502  free(result2);
8503  }
8504  else{
8505  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8506 
8507  v2->assign(result, result+sz2);
8508  }
8509 
8510  if (typesz3 != typesz or sz != sz3){
8511  char *result3;
8512  result3 = (char *) malloc(typesz3);
8513  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8514 
8515  v3->assign(result3, result3+sz3);
8516  free(result3);
8517  }
8518  else{
8519  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8520 
8521  v3->assign(result, result+sz3);
8522  }
8523 
8524  clReleaseCommandQueue (queue);
8525  clReleaseMemObject(buffer);
8526  clReleaseMemObject(buffer2);
8527  clReleaseMemObject(buffer3);
8528  clReleaseEvent(gpuExec);
8529  free(result);
8530  }
8531 
8532 
8533  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
8534  size_t sz = v.size();
8535  size_t sz2 = v2.size();
8536  size_t sz3 = v3.size();
8537  size_t typesz = sizeof(char) * sz;
8538  size_t typesz2 = sizeof(float) * sz2;
8539  size_t typesz3 = sizeof(int) * sz3;
8540  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8541  if (temp_sz > 0){
8542  if (temp_sz > 2){
8543  if (temp_sz > 3){
8544  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8545  }
8546  typesz = sizeof(char) * params->buffers_size[0];
8547  typesz2 = sizeof(float) * params->buffers_size[1];
8548  typesz3 = sizeof(int) * params->buffers_size[2];
8549  }
8550  else{
8551  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8552  }
8553  }
8554  cl_int error = 0;
8555  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8556  checkError(error);
8557  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8558  checkError(error);
8559  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8560  checkError(error);
8561  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8562  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8563  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8564  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8565 
8566  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8567  checkError (error);
8568  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8569  checkError (error);
8570  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8571  checkError (error);
8572 
8573  size_t size[3] = {sz, sz2, sz3};
8574  size_t work_dimension = 3;
8575 
8576  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8577  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8578  work_dimension = 1;
8579  }
8580  else if(temp_sz > 0){
8581  if (params->multi_dimensional){
8582  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8583  For default multidimensional global work size, leave the global_work_size vector empty, \
8584  and set multi_dimensional to true. Setting the global work size based on the values inside \
8585  the global_work_size vector.");
8586  }
8587  if (temp_sz == 1){
8588  size[0] = params->global_work_size[0];
8589  work_dimension = 1;
8590  }
8591  else if (temp_sz == 2){
8592  size[0] = params->global_work_size[0];
8593  size[1] = params->global_work_size[1];
8594  work_dimension = 2;
8595  }
8596  else{
8597  size[0] = params->global_work_size[0];
8598  size[1] = params->global_work_size[1];
8599  size[2] = params->global_work_size[2];
8600  if (temp_sz > 3){
8601  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8602  }
8603  }
8604  }
8605 
8606  cl_event gpuExec;
8607 
8608  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8609 
8610  clWaitForEvents(1, &gpuExec);
8611 
8612  char *result = (char *) malloc(typesz);
8613  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8614 
8615  std::vector<char> res = std::vector<char>();
8616  res.assign(result, result+sz);
8617 
8618  clReleaseCommandQueue (queue);
8619  clReleaseMemObject(buffer);
8620  clReleaseMemObject(buffer2);
8621  clReleaseMemObject(buffer3);
8622  clReleaseEvent(gpuExec);
8623  free(result);
8624 
8625  return res;
8626  }
8627 
8628  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
8629  size_t sz = v->size();
8630  size_t sz2 = v2.size();
8631  size_t sz3 = v3.size();
8632  size_t typesz = sizeof(char) * sz;
8633  size_t typesz2 = sizeof(float) * sz2;
8634  size_t typesz3 = sizeof(int) * sz3;
8635  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8636  if (temp_sz > 0){
8637  if (temp_sz > 2){
8638  if (temp_sz > 3){
8639  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8640  }
8641  typesz = sizeof(char) * params->buffers_size[0];
8642  typesz2 = sizeof(float) * params->buffers_size[1];
8643  typesz3 = sizeof(int) * params->buffers_size[2];
8644  }
8645  else{
8646  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8647  }
8648  }
8649  cl_int error = 0;
8650  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8651  checkError(error);
8652  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8653  checkError(error);
8654  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8655  checkError(error);
8656  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8657  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8658  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8659  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8660 
8661  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8662  checkError (error);
8663  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8664  checkError (error);
8665  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8666  checkError (error);
8667 
8668  size_t size[3] = {sz, sz2, sz3};
8669  size_t work_dimension = 3;
8670 
8671  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8672  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8673  work_dimension = 1;
8674  }
8675  else if(temp_sz > 0){
8676  if (params->multi_dimensional){
8677  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8678  For default multidimensional global work size, leave the global_work_size vector empty, \
8679  and set multi_dimensional to true. Setting the global work size based on the values inside \
8680  the global_work_size vector.");
8681  }
8682  if (temp_sz == 1){
8683  size[0] = params->global_work_size[0];
8684  work_dimension = 1;
8685  }
8686  else if (temp_sz == 2){
8687  size[0] = params->global_work_size[0];
8688  size[1] = params->global_work_size[1];
8689  work_dimension = 2;
8690  }
8691  else{
8692  size[0] = params->global_work_size[0];
8693  size[1] = params->global_work_size[1];
8694  size[2] = params->global_work_size[2];
8695  if (temp_sz > 3){
8696  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8697  }
8698  }
8699  }
8700 
8701  cl_event gpuExec;
8702 
8703  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8704 
8705  clWaitForEvents(1, &gpuExec);
8706 
8707  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8708 
8709  clWaitForEvents(1, &gpuExec);
8710 
8711  char *result = (char *) malloc(typesz);
8712  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8713 
8714  v->assign(result, result+sz);
8715 
8716  clReleaseCommandQueue (queue);
8717  clReleaseMemObject(buffer);
8718  clReleaseMemObject(buffer2);
8719  clReleaseMemObject(buffer3);
8720  clReleaseEvent(gpuExec);
8721  free(result);
8722  }
8723 
8724  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
8725  size_t sz = v->size();
8726  size_t sz2 = v2->size();
8727  size_t sz3 = v3.size();
8728  size_t typesz = sizeof(char) * sz;
8729  size_t typesz2 = sizeof(float) * sz2;
8730  size_t typesz3 = sizeof(int) * sz3;
8731  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8732  if (temp_sz > 0){
8733  if (temp_sz > 2){
8734  if (temp_sz > 3){
8735  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8736  }
8737  typesz = sizeof(char) * params->buffers_size[0];
8738  typesz2 = sizeof(float) * params->buffers_size[1];
8739  typesz3 = sizeof(int) * params->buffers_size[2];
8740  }
8741  else{
8742  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8743  }
8744  }
8745  cl_int error = 0;
8746  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8747  checkError(error);
8748  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8749  checkError(error);
8750  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8751  checkError(error);
8752  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8753  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8754  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8755  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8756 
8757  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8758  checkError (error);
8759  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8760  checkError (error);
8761  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8762  checkError (error);
8763 
8764  size_t size[3] = {sz, sz2, sz3};
8765  size_t work_dimension = 3;
8766 
8767  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8768  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8769  work_dimension = 1;
8770  }
8771  else if(temp_sz > 0){
8772  if (params->multi_dimensional){
8773  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8774  For default multidimensional global work size, leave the global_work_size vector empty, \
8775  and set multi_dimensional to true. Setting the global work size based on the values inside \
8776  the global_work_size vector.");
8777  }
8778  if (temp_sz == 1){
8779  size[0] = params->global_work_size[0];
8780  work_dimension = 1;
8781  }
8782  else if (temp_sz == 2){
8783  size[0] = params->global_work_size[0];
8784  size[1] = params->global_work_size[1];
8785  work_dimension = 2;
8786  }
8787  else{
8788  size[0] = params->global_work_size[0];
8789  size[1] = params->global_work_size[1];
8790  size[2] = params->global_work_size[2];
8791  if (temp_sz > 3){
8792  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8793  }
8794  }
8795  }
8796 
8797  cl_event gpuExec;
8798 
8799  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8800 
8801  clWaitForEvents(1, &gpuExec);
8802 
8803  char *result = (char *) malloc(typesz);
8804  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8805 
8806  v->assign(result, result+sz);
8807 
8808  if (typesz2 != typesz or sz != sz2){
8809  float *result2;
8810  result2 = (float *) malloc(typesz2);
8811  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8812 
8813  v2->assign(result2, result2+sz2);
8814  free(result2);
8815  }
8816  else{
8817  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8818 
8819  v2->assign(result, result+sz2);
8820  }
8821 
8822  clReleaseCommandQueue (queue);
8823  clReleaseMemObject(buffer);
8824  clReleaseMemObject(buffer2);
8825  clReleaseMemObject(buffer3);
8826  clReleaseEvent(gpuExec);
8827  free(result);
8828  }
8829 
8830  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
8831  size_t sz = v->size();
8832  size_t sz2 = v2->size();
8833  size_t sz3 = v3->size();
8834  size_t typesz = sizeof(char) * sz;
8835  size_t typesz2 = sizeof(float) * sz2;
8836  size_t typesz3 = sizeof(int) * sz3;
8837  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8838  if (temp_sz > 0){
8839  if (temp_sz > 2){
8840  if (temp_sz > 3){
8841  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8842  }
8843  typesz = sizeof(char) * params->buffers_size[0];
8844  typesz2 = sizeof(float) * params->buffers_size[1];
8845  typesz3 = sizeof(int) * params->buffers_size[2];
8846  }
8847  else{
8848  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8849  }
8850  }
8851  cl_int error = 0;
8852  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8853  checkError(error);
8854  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8855  checkError(error);
8856  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8857  checkError(error);
8858  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8859  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8860  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8861  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8862 
8863  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8864  checkError (error);
8865  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8866  checkError (error);
8867  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8868  checkError (error);
8869 
8870  size_t size[3] = {sz, sz2, sz3};
8871  size_t work_dimension = 3;
8872 
8873  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8874  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8875  work_dimension = 1;
8876  }
8877  else if(temp_sz > 0){
8878  if (params->multi_dimensional){
8879  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
8880  For default multidimensional global work size, leave the global_work_size vector empty, \
8881  and set multi_dimensional to true. Setting the global work size based on the values inside \
8882  the global_work_size vector.");
8883  }
8884  if (temp_sz == 1){
8885  size[0] = params->global_work_size[0];
8886  work_dimension = 1;
8887  }
8888  else if (temp_sz == 2){
8889  size[0] = params->global_work_size[0];
8890  size[1] = params->global_work_size[1];
8891  work_dimension = 2;
8892  }
8893  else{
8894  size[0] = params->global_work_size[0];
8895  size[1] = params->global_work_size[1];
8896  size[2] = params->global_work_size[2];
8897  if (temp_sz > 3){
8898  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8899  }
8900  }
8901  }
8902 
8903  cl_event gpuExec;
8904 
8905  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8906 
8907  clWaitForEvents(1, &gpuExec);
8908 
8909  char *result = (char *) malloc(typesz);
8910  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8911 
8912  v->assign(result, result+sz);
8913 
8914  if (typesz2 != typesz or sz != sz2){
8915  float *result2;
8916  result2 = (float *) malloc(typesz2);
8917  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8918 
8919  v2->assign(result2, result2+sz2);
8920  free(result2);
8921  }
8922  else{
8923  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8924 
8925  v2->assign(result, result+sz2);
8926  }
8927 
8928  if (typesz3 != typesz or sz != sz3){
8929  int *result3;
8930  result3 = (int *) malloc(typesz3);
8931  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8932 
8933  v3->assign(result3, result3+sz3);
8934  free(result3);
8935  }
8936  else{
8937  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8938 
8939  v3->assign(result, result+sz3);
8940  }
8941 
8942  clReleaseCommandQueue (queue);
8943  clReleaseMemObject(buffer);
8944  clReleaseMemObject(buffer2);
8945  clReleaseMemObject(buffer3);
8946  clReleaseEvent(gpuExec);
8947  free(result);
8948  }
8949 
8950 
8951  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
8952  size_t sz = v.size();
8953  size_t sz2 = v2.size();
8954  size_t sz3 = v3.size();
8955  size_t typesz = sizeof(char) * sz;
8956  size_t typesz2 = sizeof(float) * sz2;
8957  size_t typesz3 = sizeof(float) * sz3;
8958  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
8959  if (temp_sz > 0){
8960  if (temp_sz > 2){
8961  if (temp_sz > 3){
8962  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8963  }
8964  typesz = sizeof(char) * params->buffers_size[0];
8965  typesz2 = sizeof(float) * params->buffers_size[1];
8966  typesz3 = sizeof(float) * params->buffers_size[2];
8967  }
8968  else{
8969  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8970  }
8971  }
8972  cl_int error = 0;
8973  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8974  checkError(error);
8975  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8976  checkError(error);
8977  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8978  checkError(error);
8979  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
8980  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
8981  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
8982  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
8983 
8984  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8985  checkError (error);
8986  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8987  checkError (error);
8988  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8989  checkError (error);
8990 
8991  size_t size[3] = {sz, sz2, sz3};
8992  size_t work_dimension = 3;
8993 
8994  temp_sz = params != NULL ? params->global_work_size.size() : 0;
8995  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
8996  work_dimension = 1;
8997  }
8998  else if(temp_sz > 0){
8999  if (params->multi_dimensional){
9000  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9001  For default multidimensional global work size, leave the global_work_size vector empty, \
9002  and set multi_dimensional to true. Setting the global work size based on the values inside \
9003  the global_work_size vector.");
9004  }
9005  if (temp_sz == 1){
9006  size[0] = params->global_work_size[0];
9007  work_dimension = 1;
9008  }
9009  else if (temp_sz == 2){
9010  size[0] = params->global_work_size[0];
9011  size[1] = params->global_work_size[1];
9012  work_dimension = 2;
9013  }
9014  else{
9015  size[0] = params->global_work_size[0];
9016  size[1] = params->global_work_size[1];
9017  size[2] = params->global_work_size[2];
9018  if (temp_sz > 3){
9019  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9020  }
9021  }
9022  }
9023 
9024  cl_event gpuExec;
9025 
9026  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9027 
9028  clWaitForEvents(1, &gpuExec);
9029 
9030  char *result = (char *) malloc(typesz);
9031  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9032 
9033  std::vector<char> res = std::vector<char>();
9034  res.assign(result, result+sz);
9035 
9036  clReleaseCommandQueue (queue);
9037  clReleaseMemObject(buffer);
9038  clReleaseMemObject(buffer2);
9039  clReleaseMemObject(buffer3);
9040  clReleaseEvent(gpuExec);
9041  free(result);
9042 
9043  return res;
9044  }
9045 
9046  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
9047  size_t sz = v->size();
9048  size_t sz2 = v2.size();
9049  size_t sz3 = v3.size();
9050  size_t typesz = sizeof(char) * sz;
9051  size_t typesz2 = sizeof(float) * sz2;
9052  size_t typesz3 = sizeof(float) * sz3;
9053  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9054  if (temp_sz > 0){
9055  if (temp_sz > 2){
9056  if (temp_sz > 3){
9057  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9058  }
9059  typesz = sizeof(char) * params->buffers_size[0];
9060  typesz2 = sizeof(float) * params->buffers_size[1];
9061  typesz3 = sizeof(float) * params->buffers_size[2];
9062  }
9063  else{
9064  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9065  }
9066  }
9067  cl_int error = 0;
9068  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9069  checkError(error);
9070  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9071  checkError(error);
9072  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9073  checkError(error);
9074  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9075  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9076  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9077  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9078 
9079  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9080  checkError (error);
9081  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9082  checkError (error);
9083  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9084  checkError (error);
9085 
9086  size_t size[3] = {sz, sz2, sz3};
9087  size_t work_dimension = 3;
9088 
9089  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9090  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9091  work_dimension = 1;
9092  }
9093  else if(temp_sz > 0){
9094  if (params->multi_dimensional){
9095  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9096  For default multidimensional global work size, leave the global_work_size vector empty, \
9097  and set multi_dimensional to true. Setting the global work size based on the values inside \
9098  the global_work_size vector.");
9099  }
9100  if (temp_sz == 1){
9101  size[0] = params->global_work_size[0];
9102  work_dimension = 1;
9103  }
9104  else if (temp_sz == 2){
9105  size[0] = params->global_work_size[0];
9106  size[1] = params->global_work_size[1];
9107  work_dimension = 2;
9108  }
9109  else{
9110  size[0] = params->global_work_size[0];
9111  size[1] = params->global_work_size[1];
9112  size[2] = params->global_work_size[2];
9113  if (temp_sz > 3){
9114  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9115  }
9116  }
9117  }
9118 
9119  cl_event gpuExec;
9120 
9121  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9122 
9123  clWaitForEvents(1, &gpuExec);
9124 
9125  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9126 
9127  clWaitForEvents(1, &gpuExec);
9128 
9129  char *result = (char *) malloc(typesz);
9130  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9131 
9132  v->assign(result, result+sz);
9133 
9134  clReleaseCommandQueue (queue);
9135  clReleaseMemObject(buffer);
9136  clReleaseMemObject(buffer2);
9137  clReleaseMemObject(buffer3);
9138  clReleaseEvent(gpuExec);
9139  free(result);
9140  }
9141 
9142  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
9143  size_t sz = v->size();
9144  size_t sz2 = v2->size();
9145  size_t sz3 = v3.size();
9146  size_t typesz = sizeof(char) * sz;
9147  size_t typesz2 = sizeof(float) * sz2;
9148  size_t typesz3 = sizeof(float) * sz3;
9149  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9150  if (temp_sz > 0){
9151  if (temp_sz > 2){
9152  if (temp_sz > 3){
9153  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9154  }
9155  typesz = sizeof(char) * params->buffers_size[0];
9156  typesz2 = sizeof(float) * params->buffers_size[1];
9157  typesz3 = sizeof(float) * params->buffers_size[2];
9158  }
9159  else{
9160  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9161  }
9162  }
9163  cl_int error = 0;
9164  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9165  checkError(error);
9166  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9167  checkError(error);
9168  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9169  checkError(error);
9170  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9171  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9172  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9173  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9174 
9175  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9176  checkError (error);
9177  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9178  checkError (error);
9179  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9180  checkError (error);
9181 
9182  size_t size[3] = {sz, sz2, sz3};
9183  size_t work_dimension = 3;
9184 
9185  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9186  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9187  work_dimension = 1;
9188  }
9189  else if(temp_sz > 0){
9190  if (params->multi_dimensional){
9191  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9192  For default multidimensional global work size, leave the global_work_size vector empty, \
9193  and set multi_dimensional to true. Setting the global work size based on the values inside \
9194  the global_work_size vector.");
9195  }
9196  if (temp_sz == 1){
9197  size[0] = params->global_work_size[0];
9198  work_dimension = 1;
9199  }
9200  else if (temp_sz == 2){
9201  size[0] = params->global_work_size[0];
9202  size[1] = params->global_work_size[1];
9203  work_dimension = 2;
9204  }
9205  else{
9206  size[0] = params->global_work_size[0];
9207  size[1] = params->global_work_size[1];
9208  size[2] = params->global_work_size[2];
9209  if (temp_sz > 3){
9210  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9211  }
9212  }
9213  }
9214 
9215  cl_event gpuExec;
9216 
9217  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9218 
9219  clWaitForEvents(1, &gpuExec);
9220 
9221  char *result = (char *) malloc(typesz);
9222  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9223 
9224  v->assign(result, result+sz);
9225 
9226  if (typesz2 != typesz or sz != sz2){
9227  float *result2;
9228  result2 = (float *) malloc(typesz2);
9229  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9230 
9231  v2->assign(result2, result2+sz2);
9232  free(result2);
9233  }
9234  else{
9235  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9236 
9237  v2->assign(result, result+sz2);
9238  }
9239 
9240  clReleaseCommandQueue (queue);
9241  clReleaseMemObject(buffer);
9242  clReleaseMemObject(buffer2);
9243  clReleaseMemObject(buffer3);
9244  clReleaseEvent(gpuExec);
9245  free(result);
9246  }
9247 
9248  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
9249  size_t sz = v->size();
9250  size_t sz2 = v2->size();
9251  size_t sz3 = v3->size();
9252  size_t typesz = sizeof(char) * sz;
9253  size_t typesz2 = sizeof(float) * sz2;
9254  size_t typesz3 = sizeof(float) * sz3;
9255  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9256  if (temp_sz > 0){
9257  if (temp_sz > 2){
9258  if (temp_sz > 3){
9259  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9260  }
9261  typesz = sizeof(char) * params->buffers_size[0];
9262  typesz2 = sizeof(float) * params->buffers_size[1];
9263  typesz3 = sizeof(float) * params->buffers_size[2];
9264  }
9265  else{
9266  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9267  }
9268  }
9269  cl_int error = 0;
9270  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9271  checkError(error);
9272  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9273  checkError(error);
9274  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
9275  checkError(error);
9276  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9277  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9278  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9279  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9280 
9281  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9282  checkError (error);
9283  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9284  checkError (error);
9285  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
9286  checkError (error);
9287 
9288  size_t size[3] = {sz, sz2, sz3};
9289  size_t work_dimension = 3;
9290 
9291  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9292  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9293  work_dimension = 1;
9294  }
9295  else if(temp_sz > 0){
9296  if (params->multi_dimensional){
9297  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9298  For default multidimensional global work size, leave the global_work_size vector empty, \
9299  and set multi_dimensional to true. Setting the global work size based on the values inside \
9300  the global_work_size vector.");
9301  }
9302  if (temp_sz == 1){
9303  size[0] = params->global_work_size[0];
9304  work_dimension = 1;
9305  }
9306  else if (temp_sz == 2){
9307  size[0] = params->global_work_size[0];
9308  size[1] = params->global_work_size[1];
9309  work_dimension = 2;
9310  }
9311  else{
9312  size[0] = params->global_work_size[0];
9313  size[1] = params->global_work_size[1];
9314  size[2] = params->global_work_size[2];
9315  if (temp_sz > 3){
9316  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9317  }
9318  }
9319  }
9320 
9321  cl_event gpuExec;
9322 
9323  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9324 
9325  clWaitForEvents(1, &gpuExec);
9326 
9327  char *result = (char *) malloc(typesz);
9328  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9329 
9330  v->assign(result, result+sz);
9331 
9332  if (typesz2 != typesz or sz != sz2){
9333  float *result2;
9334  result2 = (float *) malloc(typesz2);
9335  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9336 
9337  v2->assign(result2, result2+sz2);
9338  free(result2);
9339  }
9340  else{
9341  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9342 
9343  v2->assign(result, result+sz2);
9344  }
9345 
9346  if (typesz3 != typesz or sz != sz3){
9347  float *result3;
9348  result3 = (float *) malloc(typesz3);
9349  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
9350 
9351  v3->assign(result3, result3+sz3);
9352  free(result3);
9353  }
9354  else{
9355  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9356 
9357  v3->assign(result, result+sz3);
9358  }
9359 
9360  clReleaseCommandQueue (queue);
9361  clReleaseMemObject(buffer);
9362  clReleaseMemObject(buffer2);
9363  clReleaseMemObject(buffer3);
9364  clReleaseEvent(gpuExec);
9365  free(result);
9366  }
9367 
9368 
9369  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
9370  size_t sz = v.size();
9371  size_t sz2 = v2.size();
9372  size_t sz3 = v3.size();
9373  size_t typesz = sizeof(char) * sz;
9374  size_t typesz2 = sizeof(float) * sz2;
9375  size_t typesz3 = sizeof(double) * sz3;
9376  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9377  if (temp_sz > 0){
9378  if (temp_sz > 2){
9379  if (temp_sz > 3){
9380  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9381  }
9382  typesz = sizeof(char) * params->buffers_size[0];
9383  typesz2 = sizeof(float) * params->buffers_size[1];
9384  typesz3 = sizeof(double) * params->buffers_size[2];
9385  }
9386  else{
9387  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9388  }
9389  }
9390  cl_int error = 0;
9391  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9392  checkError(error);
9393  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9394  checkError(error);
9395  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9396  checkError(error);
9397  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9398  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9399  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9400  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9401 
9402  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
9403  checkError (error);
9404  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9405  checkError (error);
9406  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9407  checkError (error);
9408 
9409  size_t size[3] = {sz, sz2, sz3};
9410  size_t work_dimension = 3;
9411 
9412  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9413  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9414  work_dimension = 1;
9415  }
9416  else if(temp_sz > 0){
9417  if (params->multi_dimensional){
9418  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9419  For default multidimensional global work size, leave the global_work_size vector empty, \
9420  and set multi_dimensional to true. Setting the global work size based on the values inside \
9421  the global_work_size vector.");
9422  }
9423  if (temp_sz == 1){
9424  size[0] = params->global_work_size[0];
9425  work_dimension = 1;
9426  }
9427  else if (temp_sz == 2){
9428  size[0] = params->global_work_size[0];
9429  size[1] = params->global_work_size[1];
9430  work_dimension = 2;
9431  }
9432  else{
9433  size[0] = params->global_work_size[0];
9434  size[1] = params->global_work_size[1];
9435  size[2] = params->global_work_size[2];
9436  if (temp_sz > 3){
9437  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9438  }
9439  }
9440  }
9441 
9442  cl_event gpuExec;
9443 
9444  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9445 
9446  clWaitForEvents(1, &gpuExec);
9447 
9448  char *result = (char *) malloc(typesz);
9449  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9450 
9451  std::vector<char> res = std::vector<char>();
9452  res.assign(result, result+sz);
9453 
9454  clReleaseCommandQueue (queue);
9455  clReleaseMemObject(buffer);
9456  clReleaseMemObject(buffer2);
9457  clReleaseMemObject(buffer3);
9458  clReleaseEvent(gpuExec);
9459  free(result);
9460 
9461  return res;
9462  }
9463 
9464  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
9465  size_t sz = v->size();
9466  size_t sz2 = v2.size();
9467  size_t sz3 = v3.size();
9468  size_t typesz = sizeof(char) * sz;
9469  size_t typesz2 = sizeof(float) * sz2;
9470  size_t typesz3 = sizeof(double) * sz3;
9471  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9472  if (temp_sz > 0){
9473  if (temp_sz > 2){
9474  if (temp_sz > 3){
9475  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9476  }
9477  typesz = sizeof(char) * params->buffers_size[0];
9478  typesz2 = sizeof(float) * params->buffers_size[1];
9479  typesz3 = sizeof(double) * params->buffers_size[2];
9480  }
9481  else{
9482  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9483  }
9484  }
9485  cl_int error = 0;
9486  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9487  checkError(error);
9488  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9489  checkError(error);
9490  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9491  checkError(error);
9492  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9493  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9494  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9495  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9496 
9497  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9498  checkError (error);
9499  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9500  checkError (error);
9501  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9502  checkError (error);
9503 
9504  size_t size[3] = {sz, sz2, sz3};
9505  size_t work_dimension = 3;
9506 
9507  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9508  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9509  work_dimension = 1;
9510  }
9511  else if(temp_sz > 0){
9512  if (params->multi_dimensional){
9513  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9514  For default multidimensional global work size, leave the global_work_size vector empty, \
9515  and set multi_dimensional to true. Setting the global work size based on the values inside \
9516  the global_work_size vector.");
9517  }
9518  if (temp_sz == 1){
9519  size[0] = params->global_work_size[0];
9520  work_dimension = 1;
9521  }
9522  else if (temp_sz == 2){
9523  size[0] = params->global_work_size[0];
9524  size[1] = params->global_work_size[1];
9525  work_dimension = 2;
9526  }
9527  else{
9528  size[0] = params->global_work_size[0];
9529  size[1] = params->global_work_size[1];
9530  size[2] = params->global_work_size[2];
9531  if (temp_sz > 3){
9532  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9533  }
9534  }
9535  }
9536 
9537  cl_event gpuExec;
9538 
9539  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9540 
9541  clWaitForEvents(1, &gpuExec);
9542 
9543  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9544 
9545  clWaitForEvents(1, &gpuExec);
9546 
9547  char *result = (char *) malloc(typesz);
9548  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9549 
9550  v->assign(result, result+sz);
9551 
9552  clReleaseCommandQueue (queue);
9553  clReleaseMemObject(buffer);
9554  clReleaseMemObject(buffer2);
9555  clReleaseMemObject(buffer3);
9556  clReleaseEvent(gpuExec);
9557  free(result);
9558  }
9559 
9560  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
9561  size_t sz = v->size();
9562  size_t sz2 = v2->size();
9563  size_t sz3 = v3.size();
9564  size_t typesz = sizeof(char) * sz;
9565  size_t typesz2 = sizeof(float) * sz2;
9566  size_t typesz3 = sizeof(double) * sz3;
9567  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9568  if (temp_sz > 0){
9569  if (temp_sz > 2){
9570  if (temp_sz > 3){
9571  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9572  }
9573  typesz = sizeof(char) * params->buffers_size[0];
9574  typesz2 = sizeof(float) * params->buffers_size[1];
9575  typesz3 = sizeof(double) * params->buffers_size[2];
9576  }
9577  else{
9578  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9579  }
9580  }
9581  cl_int error = 0;
9582  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9583  checkError(error);
9584  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9585  checkError(error);
9586  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9587  checkError(error);
9588  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9589  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9590  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9591  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9592 
9593  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9594  checkError (error);
9595  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9596  checkError (error);
9597  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9598  checkError (error);
9599 
9600  size_t size[3] = {sz, sz2, sz3};
9601  size_t work_dimension = 3;
9602 
9603  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9604  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9605  work_dimension = 1;
9606  }
9607  else if(temp_sz > 0){
9608  if (params->multi_dimensional){
9609  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9610  For default multidimensional global work size, leave the global_work_size vector empty, \
9611  and set multi_dimensional to true. Setting the global work size based on the values inside \
9612  the global_work_size vector.");
9613  }
9614  if (temp_sz == 1){
9615  size[0] = params->global_work_size[0];
9616  work_dimension = 1;
9617  }
9618  else if (temp_sz == 2){
9619  size[0] = params->global_work_size[0];
9620  size[1] = params->global_work_size[1];
9621  work_dimension = 2;
9622  }
9623  else{
9624  size[0] = params->global_work_size[0];
9625  size[1] = params->global_work_size[1];
9626  size[2] = params->global_work_size[2];
9627  if (temp_sz > 3){
9628  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9629  }
9630  }
9631  }
9632 
9633  cl_event gpuExec;
9634 
9635  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9636 
9637  clWaitForEvents(1, &gpuExec);
9638 
9639  char *result = (char *) malloc(typesz);
9640  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9641 
9642  v->assign(result, result+sz);
9643 
9644  if (typesz2 != typesz or sz != sz2){
9645  float *result2;
9646  result2 = (float *) malloc(typesz2);
9647  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9648 
9649  v2->assign(result2, result2+sz2);
9650  free(result2);
9651  }
9652  else{
9653  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9654 
9655  v2->assign(result, result+sz2);
9656  }
9657 
9658  clReleaseCommandQueue (queue);
9659  clReleaseMemObject(buffer);
9660  clReleaseMemObject(buffer2);
9661  clReleaseMemObject(buffer3);
9662  clReleaseEvent(gpuExec);
9663  free(result);
9664  }
9665 
9666  void ROS_OpenCL::process(std::vector<char>* v, std::vector<float>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
9667  size_t sz = v->size();
9668  size_t sz2 = v2->size();
9669  size_t sz3 = v3->size();
9670  size_t typesz = sizeof(char) * sz;
9671  size_t typesz2 = sizeof(float) * sz2;
9672  size_t typesz3 = sizeof(double) * sz3;
9673  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9674  if (temp_sz > 0){
9675  if (temp_sz > 2){
9676  if (temp_sz > 3){
9677  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9678  }
9679  typesz = sizeof(char) * params->buffers_size[0];
9680  typesz2 = sizeof(float) * params->buffers_size[1];
9681  typesz3 = sizeof(double) * params->buffers_size[2];
9682  }
9683  else{
9684  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9685  }
9686  }
9687  cl_int error = 0;
9688  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9689  checkError(error);
9690  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9691  checkError(error);
9692  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
9693  checkError(error);
9694  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9695  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9696  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9697  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9698 
9699  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9700  checkError (error);
9701  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9702  checkError (error);
9703  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
9704  checkError (error);
9705 
9706  size_t size[3] = {sz, sz2, sz3};
9707  size_t work_dimension = 3;
9708 
9709  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9710  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9711  work_dimension = 1;
9712  }
9713  else if(temp_sz > 0){
9714  if (params->multi_dimensional){
9715  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9716  For default multidimensional global work size, leave the global_work_size vector empty, \
9717  and set multi_dimensional to true. Setting the global work size based on the values inside \
9718  the global_work_size vector.");
9719  }
9720  if (temp_sz == 1){
9721  size[0] = params->global_work_size[0];
9722  work_dimension = 1;
9723  }
9724  else if (temp_sz == 2){
9725  size[0] = params->global_work_size[0];
9726  size[1] = params->global_work_size[1];
9727  work_dimension = 2;
9728  }
9729  else{
9730  size[0] = params->global_work_size[0];
9731  size[1] = params->global_work_size[1];
9732  size[2] = params->global_work_size[2];
9733  if (temp_sz > 3){
9734  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9735  }
9736  }
9737  }
9738 
9739  cl_event gpuExec;
9740 
9741  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9742 
9743  clWaitForEvents(1, &gpuExec);
9744 
9745  char *result = (char *) malloc(typesz);
9746  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9747 
9748  v->assign(result, result+sz);
9749 
9750  if (typesz2 != typesz or sz != sz2){
9751  float *result2;
9752  result2 = (float *) malloc(typesz2);
9753  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9754 
9755  v2->assign(result2, result2+sz2);
9756  free(result2);
9757  }
9758  else{
9759  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9760 
9761  v2->assign(result, result+sz2);
9762  }
9763 
9764  if (typesz3 != typesz or sz != sz3){
9765  double *result3;
9766  result3 = (double *) malloc(typesz3);
9767  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
9768 
9769  v3->assign(result3, result3+sz3);
9770  free(result3);
9771  }
9772  else{
9773  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9774 
9775  v3->assign(result, result+sz3);
9776  }
9777 
9778  clReleaseCommandQueue (queue);
9779  clReleaseMemObject(buffer);
9780  clReleaseMemObject(buffer2);
9781  clReleaseMemObject(buffer3);
9782  clReleaseEvent(gpuExec);
9783  free(result);
9784  }
9785 
9786 
9787  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
9788  size_t sz = v.size();
9789  size_t sz2 = v2.size();
9790  size_t sz3 = v3.size();
9791  size_t typesz = sizeof(char) * sz;
9792  size_t typesz2 = sizeof(double) * sz2;
9793  size_t typesz3 = sizeof(char) * sz3;
9794  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9795  if (temp_sz > 0){
9796  if (temp_sz > 2){
9797  if (temp_sz > 3){
9798  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9799  }
9800  typesz = sizeof(char) * params->buffers_size[0];
9801  typesz2 = sizeof(double) * params->buffers_size[1];
9802  typesz3 = sizeof(char) * params->buffers_size[2];
9803  }
9804  else{
9805  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9806  }
9807  }
9808  cl_int error = 0;
9809  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9810  checkError(error);
9811  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9812  checkError(error);
9813  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9814  checkError(error);
9815  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9816  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9817  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9818  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9819 
9820  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
9821  checkError (error);
9822  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9823  checkError (error);
9824  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9825  checkError (error);
9826 
9827  size_t size[3] = {sz, sz2, sz3};
9828  size_t work_dimension = 3;
9829 
9830  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9831  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9832  work_dimension = 1;
9833  }
9834  else if(temp_sz > 0){
9835  if (params->multi_dimensional){
9836  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9837  For default multidimensional global work size, leave the global_work_size vector empty, \
9838  and set multi_dimensional to true. Setting the global work size based on the values inside \
9839  the global_work_size vector.");
9840  }
9841  if (temp_sz == 1){
9842  size[0] = params->global_work_size[0];
9843  work_dimension = 1;
9844  }
9845  else if (temp_sz == 2){
9846  size[0] = params->global_work_size[0];
9847  size[1] = params->global_work_size[1];
9848  work_dimension = 2;
9849  }
9850  else{
9851  size[0] = params->global_work_size[0];
9852  size[1] = params->global_work_size[1];
9853  size[2] = params->global_work_size[2];
9854  if (temp_sz > 3){
9855  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9856  }
9857  }
9858  }
9859 
9860  cl_event gpuExec;
9861 
9862  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9863 
9864  clWaitForEvents(1, &gpuExec);
9865 
9866  char *result = (char *) malloc(typesz);
9867  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9868 
9869  std::vector<char> res = std::vector<char>();
9870  res.assign(result, result+sz);
9871 
9872  clReleaseCommandQueue (queue);
9873  clReleaseMemObject(buffer);
9874  clReleaseMemObject(buffer2);
9875  clReleaseMemObject(buffer3);
9876  clReleaseEvent(gpuExec);
9877  free(result);
9878 
9879  return res;
9880  }
9881 
9882  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
9883  size_t sz = v->size();
9884  size_t sz2 = v2.size();
9885  size_t sz3 = v3.size();
9886  size_t typesz = sizeof(char) * sz;
9887  size_t typesz2 = sizeof(double) * sz2;
9888  size_t typesz3 = sizeof(char) * sz3;
9889  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9890  if (temp_sz > 0){
9891  if (temp_sz > 2){
9892  if (temp_sz > 3){
9893  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9894  }
9895  typesz = sizeof(char) * params->buffers_size[0];
9896  typesz2 = sizeof(double) * params->buffers_size[1];
9897  typesz3 = sizeof(char) * params->buffers_size[2];
9898  }
9899  else{
9900  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9901  }
9902  }
9903  cl_int error = 0;
9904  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9905  checkError(error);
9906  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9907  checkError(error);
9908  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9909  checkError(error);
9910  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
9911  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
9912  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
9913  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
9914 
9915  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9916  checkError (error);
9917  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9918  checkError (error);
9919  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9920  checkError (error);
9921 
9922  size_t size[3] = {sz, sz2, sz3};
9923  size_t work_dimension = 3;
9924 
9925  temp_sz = params != NULL ? params->global_work_size.size() : 0;
9926  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
9927  work_dimension = 1;
9928  }
9929  else if(temp_sz > 0){
9930  if (params->multi_dimensional){
9931  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
9932  For default multidimensional global work size, leave the global_work_size vector empty, \
9933  and set multi_dimensional to true. Setting the global work size based on the values inside \
9934  the global_work_size vector.");
9935  }
9936  if (temp_sz == 1){
9937  size[0] = params->global_work_size[0];
9938  work_dimension = 1;
9939  }
9940  else if (temp_sz == 2){
9941  size[0] = params->global_work_size[0];
9942  size[1] = params->global_work_size[1];
9943  work_dimension = 2;
9944  }
9945  else{
9946  size[0] = params->global_work_size[0];
9947  size[1] = params->global_work_size[1];
9948  size[2] = params->global_work_size[2];
9949  if (temp_sz > 3){
9950  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9951  }
9952  }
9953  }
9954 
9955  cl_event gpuExec;
9956 
9957  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9958 
9959  clWaitForEvents(1, &gpuExec);
9960 
9961  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9962 
9963  clWaitForEvents(1, &gpuExec);
9964 
9965  char *result = (char *) malloc(typesz);
9966  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9967 
9968  v->assign(result, result+sz);
9969 
9970  clReleaseCommandQueue (queue);
9971  clReleaseMemObject(buffer);
9972  clReleaseMemObject(buffer2);
9973  clReleaseMemObject(buffer3);
9974  clReleaseEvent(gpuExec);
9975  free(result);
9976  }
9977 
9978  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
9979  size_t sz = v->size();
9980  size_t sz2 = v2->size();
9981  size_t sz3 = v3.size();
9982  size_t typesz = sizeof(char) * sz;
9983  size_t typesz2 = sizeof(double) * sz2;
9984  size_t typesz3 = sizeof(char) * sz3;
9985  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
9986  if (temp_sz > 0){
9987  if (temp_sz > 2){
9988  if (temp_sz > 3){
9989  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9990  }
9991  typesz = sizeof(char) * params->buffers_size[0];
9992  typesz2 = sizeof(double) * params->buffers_size[1];
9993  typesz3 = sizeof(char) * params->buffers_size[2];
9994  }
9995  else{
9996  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9997  }
9998  }
9999  cl_int error = 0;
10000  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10001  checkError(error);
10002  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10003  checkError(error);
10004  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10005  checkError(error);
10006  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10007  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10008  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10009  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10010 
10011  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10012  checkError (error);
10013  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10014  checkError (error);
10015  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10016  checkError (error);
10017 
10018  size_t size[3] = {sz, sz2, sz3};
10019  size_t work_dimension = 3;
10020 
10021  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10022  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10023  work_dimension = 1;
10024  }
10025  else if(temp_sz > 0){
10026  if (params->multi_dimensional){
10027  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10028  For default multidimensional global work size, leave the global_work_size vector empty, \
10029  and set multi_dimensional to true. Setting the global work size based on the values inside \
10030  the global_work_size vector.");
10031  }
10032  if (temp_sz == 1){
10033  size[0] = params->global_work_size[0];
10034  work_dimension = 1;
10035  }
10036  else if (temp_sz == 2){
10037  size[0] = params->global_work_size[0];
10038  size[1] = params->global_work_size[1];
10039  work_dimension = 2;
10040  }
10041  else{
10042  size[0] = params->global_work_size[0];
10043  size[1] = params->global_work_size[1];
10044  size[2] = params->global_work_size[2];
10045  if (temp_sz > 3){
10046  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10047  }
10048  }
10049  }
10050 
10051  cl_event gpuExec;
10052 
10053  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10054 
10055  clWaitForEvents(1, &gpuExec);
10056 
10057  char *result = (char *) malloc(typesz);
10058  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10059 
10060  v->assign(result, result+sz);
10061 
10062  if (typesz2 != typesz or sz != sz2){
10063  double *result2;
10064  result2 = (double *) malloc(typesz2);
10065  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10066 
10067  v2->assign(result2, result2+sz2);
10068  free(result2);
10069  }
10070  else{
10071  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10072 
10073  v2->assign(result, result+sz2);
10074  }
10075 
10076  clReleaseCommandQueue (queue);
10077  clReleaseMemObject(buffer);
10078  clReleaseMemObject(buffer2);
10079  clReleaseMemObject(buffer3);
10080  clReleaseEvent(gpuExec);
10081  free(result);
10082  }
10083 
10084  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
10085  size_t sz = v->size();
10086  size_t sz2 = v2->size();
10087  size_t sz3 = v3->size();
10088  size_t typesz = sizeof(char) * sz;
10089  size_t typesz2 = sizeof(double) * sz2;
10090  size_t typesz3 = sizeof(char) * sz3;
10091  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10092  if (temp_sz > 0){
10093  if (temp_sz > 2){
10094  if (temp_sz > 3){
10095  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10096  }
10097  typesz = sizeof(char) * params->buffers_size[0];
10098  typesz2 = sizeof(double) * params->buffers_size[1];
10099  typesz3 = sizeof(char) * params->buffers_size[2];
10100  }
10101  else{
10102  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10103  }
10104  }
10105  cl_int error = 0;
10106  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10107  checkError(error);
10108  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10109  checkError(error);
10110  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10111  checkError(error);
10112  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10113  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10114  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10115  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10116 
10117  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10118  checkError (error);
10119  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10120  checkError (error);
10121  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10122  checkError (error);
10123 
10124  size_t size[3] = {sz, sz2, sz3};
10125  size_t work_dimension = 3;
10126 
10127  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10128  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10129  work_dimension = 1;
10130  }
10131  else if(temp_sz > 0){
10132  if (params->multi_dimensional){
10133  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10134  For default multidimensional global work size, leave the global_work_size vector empty, \
10135  and set multi_dimensional to true. Setting the global work size based on the values inside \
10136  the global_work_size vector.");
10137  }
10138  if (temp_sz == 1){
10139  size[0] = params->global_work_size[0];
10140  work_dimension = 1;
10141  }
10142  else if (temp_sz == 2){
10143  size[0] = params->global_work_size[0];
10144  size[1] = params->global_work_size[1];
10145  work_dimension = 2;
10146  }
10147  else{
10148  size[0] = params->global_work_size[0];
10149  size[1] = params->global_work_size[1];
10150  size[2] = params->global_work_size[2];
10151  if (temp_sz > 3){
10152  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10153  }
10154  }
10155  }
10156 
10157  cl_event gpuExec;
10158 
10159  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10160 
10161  clWaitForEvents(1, &gpuExec);
10162 
10163  char *result = (char *) malloc(typesz);
10164  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10165 
10166  v->assign(result, result+sz);
10167 
10168  if (typesz2 != typesz or sz != sz2){
10169  double *result2;
10170  result2 = (double *) malloc(typesz2);
10171  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10172 
10173  v2->assign(result2, result2+sz2);
10174  free(result2);
10175  }
10176  else{
10177  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10178 
10179  v2->assign(result, result+sz2);
10180  }
10181 
10182  if (typesz3 != typesz or sz != sz3){
10183  char *result3;
10184  result3 = (char *) malloc(typesz3);
10185  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
10186 
10187  v3->assign(result3, result3+sz3);
10188  free(result3);
10189  }
10190  else{
10191  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10192 
10193  v3->assign(result, result+sz3);
10194  }
10195 
10196  clReleaseCommandQueue (queue);
10197  clReleaseMemObject(buffer);
10198  clReleaseMemObject(buffer2);
10199  clReleaseMemObject(buffer3);
10200  clReleaseEvent(gpuExec);
10201  free(result);
10202  }
10203 
10204 
10205  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
10206  size_t sz = v.size();
10207  size_t sz2 = v2.size();
10208  size_t sz3 = v3.size();
10209  size_t typesz = sizeof(char) * sz;
10210  size_t typesz2 = sizeof(double) * sz2;
10211  size_t typesz3 = sizeof(int) * sz3;
10212  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10213  if (temp_sz > 0){
10214  if (temp_sz > 2){
10215  if (temp_sz > 3){
10216  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10217  }
10218  typesz = sizeof(char) * params->buffers_size[0];
10219  typesz2 = sizeof(double) * params->buffers_size[1];
10220  typesz3 = sizeof(int) * params->buffers_size[2];
10221  }
10222  else{
10223  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10224  }
10225  }
10226  cl_int error = 0;
10227  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10228  checkError(error);
10229  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10230  checkError(error);
10231  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10232  checkError(error);
10233  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10234  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10235  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10236  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10237 
10238  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
10239  checkError (error);
10240  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10241  checkError (error);
10242  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10243  checkError (error);
10244 
10245  size_t size[3] = {sz, sz2, sz3};
10246  size_t work_dimension = 3;
10247 
10248  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10249  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10250  work_dimension = 1;
10251  }
10252  else if(temp_sz > 0){
10253  if (params->multi_dimensional){
10254  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10255  For default multidimensional global work size, leave the global_work_size vector empty, \
10256  and set multi_dimensional to true. Setting the global work size based on the values inside \
10257  the global_work_size vector.");
10258  }
10259  if (temp_sz == 1){
10260  size[0] = params->global_work_size[0];
10261  work_dimension = 1;
10262  }
10263  else if (temp_sz == 2){
10264  size[0] = params->global_work_size[0];
10265  size[1] = params->global_work_size[1];
10266  work_dimension = 2;
10267  }
10268  else{
10269  size[0] = params->global_work_size[0];
10270  size[1] = params->global_work_size[1];
10271  size[2] = params->global_work_size[2];
10272  if (temp_sz > 3){
10273  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10274  }
10275  }
10276  }
10277 
10278  cl_event gpuExec;
10279 
10280  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10281 
10282  clWaitForEvents(1, &gpuExec);
10283 
10284  char *result = (char *) malloc(typesz);
10285  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10286 
10287  std::vector<char> res = std::vector<char>();
10288  res.assign(result, result+sz);
10289 
10290  clReleaseCommandQueue (queue);
10291  clReleaseMemObject(buffer);
10292  clReleaseMemObject(buffer2);
10293  clReleaseMemObject(buffer3);
10294  clReleaseEvent(gpuExec);
10295  free(result);
10296 
10297  return res;
10298  }
10299 
10300  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
10301  size_t sz = v->size();
10302  size_t sz2 = v2.size();
10303  size_t sz3 = v3.size();
10304  size_t typesz = sizeof(char) * sz;
10305  size_t typesz2 = sizeof(double) * sz2;
10306  size_t typesz3 = sizeof(int) * sz3;
10307  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10308  if (temp_sz > 0){
10309  if (temp_sz > 2){
10310  if (temp_sz > 3){
10311  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10312  }
10313  typesz = sizeof(char) * params->buffers_size[0];
10314  typesz2 = sizeof(double) * params->buffers_size[1];
10315  typesz3 = sizeof(int) * params->buffers_size[2];
10316  }
10317  else{
10318  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10319  }
10320  }
10321  cl_int error = 0;
10322  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10323  checkError(error);
10324  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10325  checkError(error);
10326  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10327  checkError(error);
10328  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10329  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10330  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10331  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10332 
10333  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10334  checkError (error);
10335  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10336  checkError (error);
10337  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10338  checkError (error);
10339 
10340  size_t size[3] = {sz, sz2, sz3};
10341  size_t work_dimension = 3;
10342 
10343  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10344  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10345  work_dimension = 1;
10346  }
10347  else if(temp_sz > 0){
10348  if (params->multi_dimensional){
10349  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10350  For default multidimensional global work size, leave the global_work_size vector empty, \
10351  and set multi_dimensional to true. Setting the global work size based on the values inside \
10352  the global_work_size vector.");
10353  }
10354  if (temp_sz == 1){
10355  size[0] = params->global_work_size[0];
10356  work_dimension = 1;
10357  }
10358  else if (temp_sz == 2){
10359  size[0] = params->global_work_size[0];
10360  size[1] = params->global_work_size[1];
10361  work_dimension = 2;
10362  }
10363  else{
10364  size[0] = params->global_work_size[0];
10365  size[1] = params->global_work_size[1];
10366  size[2] = params->global_work_size[2];
10367  if (temp_sz > 3){
10368  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10369  }
10370  }
10371  }
10372 
10373  cl_event gpuExec;
10374 
10375  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10376 
10377  clWaitForEvents(1, &gpuExec);
10378 
10379  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10380 
10381  clWaitForEvents(1, &gpuExec);
10382 
10383  char *result = (char *) malloc(typesz);
10384  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10385 
10386  v->assign(result, result+sz);
10387 
10388  clReleaseCommandQueue (queue);
10389  clReleaseMemObject(buffer);
10390  clReleaseMemObject(buffer2);
10391  clReleaseMemObject(buffer3);
10392  clReleaseEvent(gpuExec);
10393  free(result);
10394  }
10395 
10396  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
10397  size_t sz = v->size();
10398  size_t sz2 = v2->size();
10399  size_t sz3 = v3.size();
10400  size_t typesz = sizeof(char) * sz;
10401  size_t typesz2 = sizeof(double) * sz2;
10402  size_t typesz3 = sizeof(int) * sz3;
10403  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10404  if (temp_sz > 0){
10405  if (temp_sz > 2){
10406  if (temp_sz > 3){
10407  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10408  }
10409  typesz = sizeof(char) * params->buffers_size[0];
10410  typesz2 = sizeof(double) * params->buffers_size[1];
10411  typesz3 = sizeof(int) * params->buffers_size[2];
10412  }
10413  else{
10414  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10415  }
10416  }
10417  cl_int error = 0;
10418  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10419  checkError(error);
10420  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10421  checkError(error);
10422  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10423  checkError(error);
10424  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10425  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10426  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10427  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10428 
10429  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10430  checkError (error);
10431  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10432  checkError (error);
10433  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10434  checkError (error);
10435 
10436  size_t size[3] = {sz, sz2, sz3};
10437  size_t work_dimension = 3;
10438 
10439  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10440  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10441  work_dimension = 1;
10442  }
10443  else if(temp_sz > 0){
10444  if (params->multi_dimensional){
10445  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10446  For default multidimensional global work size, leave the global_work_size vector empty, \
10447  and set multi_dimensional to true. Setting the global work size based on the values inside \
10448  the global_work_size vector.");
10449  }
10450  if (temp_sz == 1){
10451  size[0] = params->global_work_size[0];
10452  work_dimension = 1;
10453  }
10454  else if (temp_sz == 2){
10455  size[0] = params->global_work_size[0];
10456  size[1] = params->global_work_size[1];
10457  work_dimension = 2;
10458  }
10459  else{
10460  size[0] = params->global_work_size[0];
10461  size[1] = params->global_work_size[1];
10462  size[2] = params->global_work_size[2];
10463  if (temp_sz > 3){
10464  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10465  }
10466  }
10467  }
10468 
10469  cl_event gpuExec;
10470 
10471  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10472 
10473  clWaitForEvents(1, &gpuExec);
10474 
10475  char *result = (char *) malloc(typesz);
10476  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10477 
10478  v->assign(result, result+sz);
10479 
10480  if (typesz2 != typesz or sz != sz2){
10481  double *result2;
10482  result2 = (double *) malloc(typesz2);
10483  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10484 
10485  v2->assign(result2, result2+sz2);
10486  free(result2);
10487  }
10488  else{
10489  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10490 
10491  v2->assign(result, result+sz2);
10492  }
10493 
10494  clReleaseCommandQueue (queue);
10495  clReleaseMemObject(buffer);
10496  clReleaseMemObject(buffer2);
10497  clReleaseMemObject(buffer3);
10498  clReleaseEvent(gpuExec);
10499  free(result);
10500  }
10501 
10502  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
10503  size_t sz = v->size();
10504  size_t sz2 = v2->size();
10505  size_t sz3 = v3->size();
10506  size_t typesz = sizeof(char) * sz;
10507  size_t typesz2 = sizeof(double) * sz2;
10508  size_t typesz3 = sizeof(int) * sz3;
10509  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10510  if (temp_sz > 0){
10511  if (temp_sz > 2){
10512  if (temp_sz > 3){
10513  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10514  }
10515  typesz = sizeof(char) * params->buffers_size[0];
10516  typesz2 = sizeof(double) * params->buffers_size[1];
10517  typesz3 = sizeof(int) * params->buffers_size[2];
10518  }
10519  else{
10520  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10521  }
10522  }
10523  cl_int error = 0;
10524  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10525  checkError(error);
10526  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10527  checkError(error);
10528  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10529  checkError(error);
10530  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10531  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10532  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10533  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10534 
10535  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10536  checkError (error);
10537  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10538  checkError (error);
10539  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10540  checkError (error);
10541 
10542  size_t size[3] = {sz, sz2, sz3};
10543  size_t work_dimension = 3;
10544 
10545  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10546  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10547  work_dimension = 1;
10548  }
10549  else if(temp_sz > 0){
10550  if (params->multi_dimensional){
10551  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10552  For default multidimensional global work size, leave the global_work_size vector empty, \
10553  and set multi_dimensional to true. Setting the global work size based on the values inside \
10554  the global_work_size vector.");
10555  }
10556  if (temp_sz == 1){
10557  size[0] = params->global_work_size[0];
10558  work_dimension = 1;
10559  }
10560  else if (temp_sz == 2){
10561  size[0] = params->global_work_size[0];
10562  size[1] = params->global_work_size[1];
10563  work_dimension = 2;
10564  }
10565  else{
10566  size[0] = params->global_work_size[0];
10567  size[1] = params->global_work_size[1];
10568  size[2] = params->global_work_size[2];
10569  if (temp_sz > 3){
10570  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10571  }
10572  }
10573  }
10574 
10575  cl_event gpuExec;
10576 
10577  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10578 
10579  clWaitForEvents(1, &gpuExec);
10580 
10581  char *result = (char *) malloc(typesz);
10582  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10583 
10584  v->assign(result, result+sz);
10585 
10586  if (typesz2 != typesz or sz != sz2){
10587  double *result2;
10588  result2 = (double *) malloc(typesz2);
10589  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10590 
10591  v2->assign(result2, result2+sz2);
10592  free(result2);
10593  }
10594  else{
10595  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10596 
10597  v2->assign(result, result+sz2);
10598  }
10599 
10600  if (typesz3 != typesz or sz != sz3){
10601  int *result3;
10602  result3 = (int *) malloc(typesz3);
10603  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
10604 
10605  v3->assign(result3, result3+sz3);
10606  free(result3);
10607  }
10608  else{
10609  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10610 
10611  v3->assign(result, result+sz3);
10612  }
10613 
10614  clReleaseCommandQueue (queue);
10615  clReleaseMemObject(buffer);
10616  clReleaseMemObject(buffer2);
10617  clReleaseMemObject(buffer3);
10618  clReleaseEvent(gpuExec);
10619  free(result);
10620  }
10621 
10622 
10623  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
10624  size_t sz = v.size();
10625  size_t sz2 = v2.size();
10626  size_t sz3 = v3.size();
10627  size_t typesz = sizeof(char) * sz;
10628  size_t typesz2 = sizeof(double) * sz2;
10629  size_t typesz3 = sizeof(float) * sz3;
10630  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10631  if (temp_sz > 0){
10632  if (temp_sz > 2){
10633  if (temp_sz > 3){
10634  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10635  }
10636  typesz = sizeof(char) * params->buffers_size[0];
10637  typesz2 = sizeof(double) * params->buffers_size[1];
10638  typesz3 = sizeof(float) * params->buffers_size[2];
10639  }
10640  else{
10641  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10642  }
10643  }
10644  cl_int error = 0;
10645  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10646  checkError(error);
10647  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10648  checkError(error);
10649  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10650  checkError(error);
10651  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10652  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10653  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10654  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10655 
10656  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
10657  checkError (error);
10658  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10659  checkError (error);
10660  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10661  checkError (error);
10662 
10663  size_t size[3] = {sz, sz2, sz3};
10664  size_t work_dimension = 3;
10665 
10666  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10667  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10668  work_dimension = 1;
10669  }
10670  else if(temp_sz > 0){
10671  if (params->multi_dimensional){
10672  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10673  For default multidimensional global work size, leave the global_work_size vector empty, \
10674  and set multi_dimensional to true. Setting the global work size based on the values inside \
10675  the global_work_size vector.");
10676  }
10677  if (temp_sz == 1){
10678  size[0] = params->global_work_size[0];
10679  work_dimension = 1;
10680  }
10681  else if (temp_sz == 2){
10682  size[0] = params->global_work_size[0];
10683  size[1] = params->global_work_size[1];
10684  work_dimension = 2;
10685  }
10686  else{
10687  size[0] = params->global_work_size[0];
10688  size[1] = params->global_work_size[1];
10689  size[2] = params->global_work_size[2];
10690  if (temp_sz > 3){
10691  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10692  }
10693  }
10694  }
10695 
10696  cl_event gpuExec;
10697 
10698  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10699 
10700  clWaitForEvents(1, &gpuExec);
10701 
10702  char *result = (char *) malloc(typesz);
10703  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10704 
10705  std::vector<char> res = std::vector<char>();
10706  res.assign(result, result+sz);
10707 
10708  clReleaseCommandQueue (queue);
10709  clReleaseMemObject(buffer);
10710  clReleaseMemObject(buffer2);
10711  clReleaseMemObject(buffer3);
10712  clReleaseEvent(gpuExec);
10713  free(result);
10714 
10715  return res;
10716  }
10717 
10718  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
10719  size_t sz = v->size();
10720  size_t sz2 = v2.size();
10721  size_t sz3 = v3.size();
10722  size_t typesz = sizeof(char) * sz;
10723  size_t typesz2 = sizeof(double) * sz2;
10724  size_t typesz3 = sizeof(float) * sz3;
10725  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10726  if (temp_sz > 0){
10727  if (temp_sz > 2){
10728  if (temp_sz > 3){
10729  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10730  }
10731  typesz = sizeof(char) * params->buffers_size[0];
10732  typesz2 = sizeof(double) * params->buffers_size[1];
10733  typesz3 = sizeof(float) * params->buffers_size[2];
10734  }
10735  else{
10736  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10737  }
10738  }
10739  cl_int error = 0;
10740  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10741  checkError(error);
10742  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10743  checkError(error);
10744  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10745  checkError(error);
10746  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10747  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10748  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10749  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10750 
10751  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10752  checkError (error);
10753  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10754  checkError (error);
10755  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10756  checkError (error);
10757 
10758  size_t size[3] = {sz, sz2, sz3};
10759  size_t work_dimension = 3;
10760 
10761  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10762  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10763  work_dimension = 1;
10764  }
10765  else if(temp_sz > 0){
10766  if (params->multi_dimensional){
10767  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10768  For default multidimensional global work size, leave the global_work_size vector empty, \
10769  and set multi_dimensional to true. Setting the global work size based on the values inside \
10770  the global_work_size vector.");
10771  }
10772  if (temp_sz == 1){
10773  size[0] = params->global_work_size[0];
10774  work_dimension = 1;
10775  }
10776  else if (temp_sz == 2){
10777  size[0] = params->global_work_size[0];
10778  size[1] = params->global_work_size[1];
10779  work_dimension = 2;
10780  }
10781  else{
10782  size[0] = params->global_work_size[0];
10783  size[1] = params->global_work_size[1];
10784  size[2] = params->global_work_size[2];
10785  if (temp_sz > 3){
10786  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10787  }
10788  }
10789  }
10790 
10791  cl_event gpuExec;
10792 
10793  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10794 
10795  clWaitForEvents(1, &gpuExec);
10796 
10797  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10798 
10799  clWaitForEvents(1, &gpuExec);
10800 
10801  char *result = (char *) malloc(typesz);
10802  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10803 
10804  v->assign(result, result+sz);
10805 
10806  clReleaseCommandQueue (queue);
10807  clReleaseMemObject(buffer);
10808  clReleaseMemObject(buffer2);
10809  clReleaseMemObject(buffer3);
10810  clReleaseEvent(gpuExec);
10811  free(result);
10812  }
10813 
10814  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
10815  size_t sz = v->size();
10816  size_t sz2 = v2->size();
10817  size_t sz3 = v3.size();
10818  size_t typesz = sizeof(char) * sz;
10819  size_t typesz2 = sizeof(double) * sz2;
10820  size_t typesz3 = sizeof(float) * sz3;
10821  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10822  if (temp_sz > 0){
10823  if (temp_sz > 2){
10824  if (temp_sz > 3){
10825  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10826  }
10827  typesz = sizeof(char) * params->buffers_size[0];
10828  typesz2 = sizeof(double) * params->buffers_size[1];
10829  typesz3 = sizeof(float) * params->buffers_size[2];
10830  }
10831  else{
10832  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10833  }
10834  }
10835  cl_int error = 0;
10836  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10837  checkError(error);
10838  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10839  checkError(error);
10840  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10841  checkError(error);
10842  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10843  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10844  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10845  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10846 
10847  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10848  checkError (error);
10849  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10850  checkError (error);
10851  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10852  checkError (error);
10853 
10854  size_t size[3] = {sz, sz2, sz3};
10855  size_t work_dimension = 3;
10856 
10857  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10858  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10859  work_dimension = 1;
10860  }
10861  else if(temp_sz > 0){
10862  if (params->multi_dimensional){
10863  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10864  For default multidimensional global work size, leave the global_work_size vector empty, \
10865  and set multi_dimensional to true. Setting the global work size based on the values inside \
10866  the global_work_size vector.");
10867  }
10868  if (temp_sz == 1){
10869  size[0] = params->global_work_size[0];
10870  work_dimension = 1;
10871  }
10872  else if (temp_sz == 2){
10873  size[0] = params->global_work_size[0];
10874  size[1] = params->global_work_size[1];
10875  work_dimension = 2;
10876  }
10877  else{
10878  size[0] = params->global_work_size[0];
10879  size[1] = params->global_work_size[1];
10880  size[2] = params->global_work_size[2];
10881  if (temp_sz > 3){
10882  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10883  }
10884  }
10885  }
10886 
10887  cl_event gpuExec;
10888 
10889  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10890 
10891  clWaitForEvents(1, &gpuExec);
10892 
10893  char *result = (char *) malloc(typesz);
10894  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10895 
10896  v->assign(result, result+sz);
10897 
10898  if (typesz2 != typesz or sz != sz2){
10899  double *result2;
10900  result2 = (double *) malloc(typesz2);
10901  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10902 
10903  v2->assign(result2, result2+sz2);
10904  free(result2);
10905  }
10906  else{
10907  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10908 
10909  v2->assign(result, result+sz2);
10910  }
10911 
10912  clReleaseCommandQueue (queue);
10913  clReleaseMemObject(buffer);
10914  clReleaseMemObject(buffer2);
10915  clReleaseMemObject(buffer3);
10916  clReleaseEvent(gpuExec);
10917  free(result);
10918  }
10919 
10920  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
10921  size_t sz = v->size();
10922  size_t sz2 = v2->size();
10923  size_t sz3 = v3->size();
10924  size_t typesz = sizeof(char) * sz;
10925  size_t typesz2 = sizeof(double) * sz2;
10926  size_t typesz3 = sizeof(float) * sz3;
10927  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
10928  if (temp_sz > 0){
10929  if (temp_sz > 2){
10930  if (temp_sz > 3){
10931  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10932  }
10933  typesz = sizeof(char) * params->buffers_size[0];
10934  typesz2 = sizeof(double) * params->buffers_size[1];
10935  typesz3 = sizeof(float) * params->buffers_size[2];
10936  }
10937  else{
10938  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10939  }
10940  }
10941  cl_int error = 0;
10942  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10943  checkError(error);
10944  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10945  checkError(error);
10946  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10947  checkError(error);
10948  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
10949  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
10950  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
10951  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
10952 
10953  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10954  checkError (error);
10955  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10956  checkError (error);
10957  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10958  checkError (error);
10959 
10960  size_t size[3] = {sz, sz2, sz3};
10961  size_t work_dimension = 3;
10962 
10963  temp_sz = params != NULL ? params->global_work_size.size() : 0;
10964  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
10965  work_dimension = 1;
10966  }
10967  else if(temp_sz > 0){
10968  if (params->multi_dimensional){
10969  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
10970  For default multidimensional global work size, leave the global_work_size vector empty, \
10971  and set multi_dimensional to true. Setting the global work size based on the values inside \
10972  the global_work_size vector.");
10973  }
10974  if (temp_sz == 1){
10975  size[0] = params->global_work_size[0];
10976  work_dimension = 1;
10977  }
10978  else if (temp_sz == 2){
10979  size[0] = params->global_work_size[0];
10980  size[1] = params->global_work_size[1];
10981  work_dimension = 2;
10982  }
10983  else{
10984  size[0] = params->global_work_size[0];
10985  size[1] = params->global_work_size[1];
10986  size[2] = params->global_work_size[2];
10987  if (temp_sz > 3){
10988  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10989  }
10990  }
10991  }
10992 
10993  cl_event gpuExec;
10994 
10995  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10996 
10997  clWaitForEvents(1, &gpuExec);
10998 
10999  char *result = (char *) malloc(typesz);
11000  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11001 
11002  v->assign(result, result+sz);
11003 
11004  if (typesz2 != typesz or sz != sz2){
11005  double *result2;
11006  result2 = (double *) malloc(typesz2);
11007  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11008 
11009  v2->assign(result2, result2+sz2);
11010  free(result2);
11011  }
11012  else{
11013  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11014 
11015  v2->assign(result, result+sz2);
11016  }
11017 
11018  if (typesz3 != typesz or sz != sz3){
11019  float *result3;
11020  result3 = (float *) malloc(typesz3);
11021  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11022 
11023  v3->assign(result3, result3+sz3);
11024  free(result3);
11025  }
11026  else{
11027  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11028 
11029  v3->assign(result, result+sz3);
11030  }
11031 
11032  clReleaseCommandQueue (queue);
11033  clReleaseMemObject(buffer);
11034  clReleaseMemObject(buffer2);
11035  clReleaseMemObject(buffer3);
11036  clReleaseEvent(gpuExec);
11037  free(result);
11038  }
11039 
11040 
11041  std::vector<char> ROS_OpenCL::process(const std::vector<char> v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
11042  size_t sz = v.size();
11043  size_t sz2 = v2.size();
11044  size_t sz3 = v3.size();
11045  size_t typesz = sizeof(char) * sz;
11046  size_t typesz2 = sizeof(double) * sz2;
11047  size_t typesz3 = sizeof(double) * sz3;
11048  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11049  if (temp_sz > 0){
11050  if (temp_sz > 2){
11051  if (temp_sz > 3){
11052  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11053  }
11054  typesz = sizeof(char) * params->buffers_size[0];
11055  typesz2 = sizeof(double) * params->buffers_size[1];
11056  typesz3 = sizeof(double) * params->buffers_size[2];
11057  }
11058  else{
11059  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11060  }
11061  }
11062  cl_int error = 0;
11063  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11064  checkError(error);
11065  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11066  checkError(error);
11067  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11068  checkError(error);
11069  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11070  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11071  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11072  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11073 
11074  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11075  checkError (error);
11076  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11077  checkError (error);
11078  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11079  checkError (error);
11080 
11081  size_t size[3] = {sz, sz2, sz3};
11082  size_t work_dimension = 3;
11083 
11084  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11085  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11086  work_dimension = 1;
11087  }
11088  else if(temp_sz > 0){
11089  if (params->multi_dimensional){
11090  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11091  For default multidimensional global work size, leave the global_work_size vector empty, \
11092  and set multi_dimensional to true. Setting the global work size based on the values inside \
11093  the global_work_size vector.");
11094  }
11095  if (temp_sz == 1){
11096  size[0] = params->global_work_size[0];
11097  work_dimension = 1;
11098  }
11099  else if (temp_sz == 2){
11100  size[0] = params->global_work_size[0];
11101  size[1] = params->global_work_size[1];
11102  work_dimension = 2;
11103  }
11104  else{
11105  size[0] = params->global_work_size[0];
11106  size[1] = params->global_work_size[1];
11107  size[2] = params->global_work_size[2];
11108  if (temp_sz > 3){
11109  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11110  }
11111  }
11112  }
11113 
11114  cl_event gpuExec;
11115 
11116  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11117 
11118  clWaitForEvents(1, &gpuExec);
11119 
11120  char *result = (char *) malloc(typesz);
11121  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11122 
11123  std::vector<char> res = std::vector<char>();
11124  res.assign(result, result+sz);
11125 
11126  clReleaseCommandQueue (queue);
11127  clReleaseMemObject(buffer);
11128  clReleaseMemObject(buffer2);
11129  clReleaseMemObject(buffer3);
11130  clReleaseEvent(gpuExec);
11131  free(result);
11132 
11133  return res;
11134  }
11135 
11136  void ROS_OpenCL::process(std::vector<char>* v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
11137  size_t sz = v->size();
11138  size_t sz2 = v2.size();
11139  size_t sz3 = v3.size();
11140  size_t typesz = sizeof(char) * sz;
11141  size_t typesz2 = sizeof(double) * sz2;
11142  size_t typesz3 = sizeof(double) * sz3;
11143  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11144  if (temp_sz > 0){
11145  if (temp_sz > 2){
11146  if (temp_sz > 3){
11147  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11148  }
11149  typesz = sizeof(char) * params->buffers_size[0];
11150  typesz2 = sizeof(double) * params->buffers_size[1];
11151  typesz3 = sizeof(double) * params->buffers_size[2];
11152  }
11153  else{
11154  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11155  }
11156  }
11157  cl_int error = 0;
11158  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11159  checkError(error);
11160  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11161  checkError(error);
11162  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11163  checkError(error);
11164  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11165  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11166  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11167  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11168 
11169  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11170  checkError (error);
11171  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11172  checkError (error);
11173  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11174  checkError (error);
11175 
11176  size_t size[3] = {sz, sz2, sz3};
11177  size_t work_dimension = 3;
11178 
11179  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11180  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11181  work_dimension = 1;
11182  }
11183  else if(temp_sz > 0){
11184  if (params->multi_dimensional){
11185  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11186  For default multidimensional global work size, leave the global_work_size vector empty, \
11187  and set multi_dimensional to true. Setting the global work size based on the values inside \
11188  the global_work_size vector.");
11189  }
11190  if (temp_sz == 1){
11191  size[0] = params->global_work_size[0];
11192  work_dimension = 1;
11193  }
11194  else if (temp_sz == 2){
11195  size[0] = params->global_work_size[0];
11196  size[1] = params->global_work_size[1];
11197  work_dimension = 2;
11198  }
11199  else{
11200  size[0] = params->global_work_size[0];
11201  size[1] = params->global_work_size[1];
11202  size[2] = params->global_work_size[2];
11203  if (temp_sz > 3){
11204  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11205  }
11206  }
11207  }
11208 
11209  cl_event gpuExec;
11210 
11211  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11212 
11213  clWaitForEvents(1, &gpuExec);
11214 
11215  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11216 
11217  clWaitForEvents(1, &gpuExec);
11218 
11219  char *result = (char *) malloc(typesz);
11220  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11221 
11222  v->assign(result, result+sz);
11223 
11224  clReleaseCommandQueue (queue);
11225  clReleaseMemObject(buffer);
11226  clReleaseMemObject(buffer2);
11227  clReleaseMemObject(buffer3);
11228  clReleaseEvent(gpuExec);
11229  free(result);
11230  }
11231 
11232  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
11233  size_t sz = v->size();
11234  size_t sz2 = v2->size();
11235  size_t sz3 = v3.size();
11236  size_t typesz = sizeof(char) * sz;
11237  size_t typesz2 = sizeof(double) * sz2;
11238  size_t typesz3 = sizeof(double) * sz3;
11239  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11240  if (temp_sz > 0){
11241  if (temp_sz > 2){
11242  if (temp_sz > 3){
11243  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11244  }
11245  typesz = sizeof(char) * params->buffers_size[0];
11246  typesz2 = sizeof(double) * params->buffers_size[1];
11247  typesz3 = sizeof(double) * params->buffers_size[2];
11248  }
11249  else{
11250  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11251  }
11252  }
11253  cl_int error = 0;
11254  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11255  checkError(error);
11256  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11257  checkError(error);
11258  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11259  checkError(error);
11260  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11261  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11262  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11263  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11264 
11265  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11266  checkError (error);
11267  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11268  checkError (error);
11269  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11270  checkError (error);
11271 
11272  size_t size[3] = {sz, sz2, sz3};
11273  size_t work_dimension = 3;
11274 
11275  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11276  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11277  work_dimension = 1;
11278  }
11279  else if(temp_sz > 0){
11280  if (params->multi_dimensional){
11281  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11282  For default multidimensional global work size, leave the global_work_size vector empty, \
11283  and set multi_dimensional to true. Setting the global work size based on the values inside \
11284  the global_work_size vector.");
11285  }
11286  if (temp_sz == 1){
11287  size[0] = params->global_work_size[0];
11288  work_dimension = 1;
11289  }
11290  else if (temp_sz == 2){
11291  size[0] = params->global_work_size[0];
11292  size[1] = params->global_work_size[1];
11293  work_dimension = 2;
11294  }
11295  else{
11296  size[0] = params->global_work_size[0];
11297  size[1] = params->global_work_size[1];
11298  size[2] = params->global_work_size[2];
11299  if (temp_sz > 3){
11300  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11301  }
11302  }
11303  }
11304 
11305  cl_event gpuExec;
11306 
11307  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11308 
11309  clWaitForEvents(1, &gpuExec);
11310 
11311  char *result = (char *) malloc(typesz);
11312  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11313 
11314  v->assign(result, result+sz);
11315 
11316  if (typesz2 != typesz or sz != sz2){
11317  double *result2;
11318  result2 = (double *) malloc(typesz2);
11319  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11320 
11321  v2->assign(result2, result2+sz2);
11322  free(result2);
11323  }
11324  else{
11325  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11326 
11327  v2->assign(result, result+sz2);
11328  }
11329 
11330  clReleaseCommandQueue (queue);
11331  clReleaseMemObject(buffer);
11332  clReleaseMemObject(buffer2);
11333  clReleaseMemObject(buffer3);
11334  clReleaseEvent(gpuExec);
11335  free(result);
11336  }
11337 
11338  void ROS_OpenCL::process(std::vector<char>* v, std::vector<double>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
11339  size_t sz = v->size();
11340  size_t sz2 = v2->size();
11341  size_t sz3 = v3->size();
11342  size_t typesz = sizeof(char) * sz;
11343  size_t typesz2 = sizeof(double) * sz2;
11344  size_t typesz3 = sizeof(double) * sz3;
11345  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11346  if (temp_sz > 0){
11347  if (temp_sz > 2){
11348  if (temp_sz > 3){
11349  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11350  }
11351  typesz = sizeof(char) * params->buffers_size[0];
11352  typesz2 = sizeof(double) * params->buffers_size[1];
11353  typesz3 = sizeof(double) * params->buffers_size[2];
11354  }
11355  else{
11356  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11357  }
11358  }
11359  cl_int error = 0;
11360  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11361  checkError(error);
11362  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11363  checkError(error);
11364  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
11365  checkError(error);
11366  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11367  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11368  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11369  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11370 
11371  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11372  checkError (error);
11373  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11374  checkError (error);
11375  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
11376  checkError (error);
11377 
11378  size_t size[3] = {sz, sz2, sz3};
11379  size_t work_dimension = 3;
11380 
11381  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11382  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11383  work_dimension = 1;
11384  }
11385  else if(temp_sz > 0){
11386  if (params->multi_dimensional){
11387  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11388  For default multidimensional global work size, leave the global_work_size vector empty, \
11389  and set multi_dimensional to true. Setting the global work size based on the values inside \
11390  the global_work_size vector.");
11391  }
11392  if (temp_sz == 1){
11393  size[0] = params->global_work_size[0];
11394  work_dimension = 1;
11395  }
11396  else if (temp_sz == 2){
11397  size[0] = params->global_work_size[0];
11398  size[1] = params->global_work_size[1];
11399  work_dimension = 2;
11400  }
11401  else{
11402  size[0] = params->global_work_size[0];
11403  size[1] = params->global_work_size[1];
11404  size[2] = params->global_work_size[2];
11405  if (temp_sz > 3){
11406  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11407  }
11408  }
11409  }
11410 
11411  cl_event gpuExec;
11412 
11413  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11414 
11415  clWaitForEvents(1, &gpuExec);
11416 
11417  char *result = (char *) malloc(typesz);
11418  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11419 
11420  v->assign(result, result+sz);
11421 
11422  if (typesz2 != typesz or sz != sz2){
11423  double *result2;
11424  result2 = (double *) malloc(typesz2);
11425  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11426 
11427  v2->assign(result2, result2+sz2);
11428  free(result2);
11429  }
11430  else{
11431  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11432 
11433  v2->assign(result, result+sz2);
11434  }
11435 
11436  if (typesz3 != typesz or sz != sz3){
11437  double *result3;
11438  result3 = (double *) malloc(typesz3);
11439  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11440 
11441  v3->assign(result3, result3+sz3);
11442  free(result3);
11443  }
11444  else{
11445  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11446 
11447  v3->assign(result, result+sz3);
11448  }
11449 
11450  clReleaseCommandQueue (queue);
11451  clReleaseMemObject(buffer);
11452  clReleaseMemObject(buffer2);
11453  clReleaseMemObject(buffer3);
11454  clReleaseEvent(gpuExec);
11455  free(result);
11456  }
11457 
11458 
11459  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
11460  size_t sz = v.size();
11461  size_t sz2 = v2.size();
11462  size_t sz3 = v3.size();
11463  size_t typesz = sizeof(int) * sz;
11464  size_t typesz2 = sizeof(char) * sz2;
11465  size_t typesz3 = sizeof(char) * sz3;
11466  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11467  if (temp_sz > 0){
11468  if (temp_sz > 2){
11469  if (temp_sz > 3){
11470  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11471  }
11472  typesz = sizeof(int) * params->buffers_size[0];
11473  typesz2 = sizeof(char) * params->buffers_size[1];
11474  typesz3 = sizeof(char) * params->buffers_size[2];
11475  }
11476  else{
11477  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11478  }
11479  }
11480  cl_int error = 0;
11481  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11482  checkError(error);
11483  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11484  checkError(error);
11485  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11486  checkError(error);
11487  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11488  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11489  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11490  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11491 
11492  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11493  checkError (error);
11494  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11495  checkError (error);
11496  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11497  checkError (error);
11498 
11499  size_t size[3] = {sz, sz2, sz3};
11500  size_t work_dimension = 3;
11501 
11502  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11503  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11504  work_dimension = 1;
11505  }
11506  else if(temp_sz > 0){
11507  if (params->multi_dimensional){
11508  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11509  For default multidimensional global work size, leave the global_work_size vector empty, \
11510  and set multi_dimensional to true. Setting the global work size based on the values inside \
11511  the global_work_size vector.");
11512  }
11513  if (temp_sz == 1){
11514  size[0] = params->global_work_size[0];
11515  work_dimension = 1;
11516  }
11517  else if (temp_sz == 2){
11518  size[0] = params->global_work_size[0];
11519  size[1] = params->global_work_size[1];
11520  work_dimension = 2;
11521  }
11522  else{
11523  size[0] = params->global_work_size[0];
11524  size[1] = params->global_work_size[1];
11525  size[2] = params->global_work_size[2];
11526  if (temp_sz > 3){
11527  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11528  }
11529  }
11530  }
11531 
11532  cl_event gpuExec;
11533 
11534  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11535 
11536  clWaitForEvents(1, &gpuExec);
11537 
11538  int *result = (int *) malloc(typesz);
11539  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11540 
11541  std::vector<int> res = std::vector<int>();
11542  res.assign(result, result+sz);
11543 
11544  clReleaseCommandQueue (queue);
11545  clReleaseMemObject(buffer);
11546  clReleaseMemObject(buffer2);
11547  clReleaseMemObject(buffer3);
11548  clReleaseEvent(gpuExec);
11549  free(result);
11550 
11551  return res;
11552  }
11553 
11554  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
11555  size_t sz = v->size();
11556  size_t sz2 = v2.size();
11557  size_t sz3 = v3.size();
11558  size_t typesz = sizeof(int) * sz;
11559  size_t typesz2 = sizeof(char) * sz2;
11560  size_t typesz3 = sizeof(char) * sz3;
11561  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11562  if (temp_sz > 0){
11563  if (temp_sz > 2){
11564  if (temp_sz > 3){
11565  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11566  }
11567  typesz = sizeof(int) * params->buffers_size[0];
11568  typesz2 = sizeof(char) * params->buffers_size[1];
11569  typesz3 = sizeof(char) * params->buffers_size[2];
11570  }
11571  else{
11572  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11573  }
11574  }
11575  cl_int error = 0;
11576  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11577  checkError(error);
11578  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11579  checkError(error);
11580  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11581  checkError(error);
11582  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11583  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11584  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11585  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11586 
11587  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11588  checkError (error);
11589  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11590  checkError (error);
11591  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11592  checkError (error);
11593 
11594  size_t size[3] = {sz, sz2, sz3};
11595  size_t work_dimension = 3;
11596 
11597  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11598  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11599  work_dimension = 1;
11600  }
11601  else if(temp_sz > 0){
11602  if (params->multi_dimensional){
11603  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11604  For default multidimensional global work size, leave the global_work_size vector empty, \
11605  and set multi_dimensional to true. Setting the global work size based on the values inside \
11606  the global_work_size vector.");
11607  }
11608  if (temp_sz == 1){
11609  size[0] = params->global_work_size[0];
11610  work_dimension = 1;
11611  }
11612  else if (temp_sz == 2){
11613  size[0] = params->global_work_size[0];
11614  size[1] = params->global_work_size[1];
11615  work_dimension = 2;
11616  }
11617  else{
11618  size[0] = params->global_work_size[0];
11619  size[1] = params->global_work_size[1];
11620  size[2] = params->global_work_size[2];
11621  if (temp_sz > 3){
11622  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11623  }
11624  }
11625  }
11626 
11627  cl_event gpuExec;
11628 
11629  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11630 
11631  clWaitForEvents(1, &gpuExec);
11632 
11633  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11634 
11635  clWaitForEvents(1, &gpuExec);
11636 
11637  int *result = (int *) malloc(typesz);
11638  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11639 
11640  v->assign(result, result+sz);
11641 
11642  clReleaseCommandQueue (queue);
11643  clReleaseMemObject(buffer);
11644  clReleaseMemObject(buffer2);
11645  clReleaseMemObject(buffer3);
11646  clReleaseEvent(gpuExec);
11647  free(result);
11648  }
11649 
11650  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
11651  size_t sz = v->size();
11652  size_t sz2 = v2->size();
11653  size_t sz3 = v3.size();
11654  size_t typesz = sizeof(int) * sz;
11655  size_t typesz2 = sizeof(char) * sz2;
11656  size_t typesz3 = sizeof(char) * sz3;
11657  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11658  if (temp_sz > 0){
11659  if (temp_sz > 2){
11660  if (temp_sz > 3){
11661  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11662  }
11663  typesz = sizeof(int) * params->buffers_size[0];
11664  typesz2 = sizeof(char) * params->buffers_size[1];
11665  typesz3 = sizeof(char) * params->buffers_size[2];
11666  }
11667  else{
11668  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11669  }
11670  }
11671  cl_int error = 0;
11672  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11673  checkError(error);
11674  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11675  checkError(error);
11676  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11677  checkError(error);
11678  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11679  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11680  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11681  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11682 
11683  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11684  checkError (error);
11685  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11686  checkError (error);
11687  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11688  checkError (error);
11689 
11690  size_t size[3] = {sz, sz2, sz3};
11691  size_t work_dimension = 3;
11692 
11693  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11694  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11695  work_dimension = 1;
11696  }
11697  else if(temp_sz > 0){
11698  if (params->multi_dimensional){
11699  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11700  For default multidimensional global work size, leave the global_work_size vector empty, \
11701  and set multi_dimensional to true. Setting the global work size based on the values inside \
11702  the global_work_size vector.");
11703  }
11704  if (temp_sz == 1){
11705  size[0] = params->global_work_size[0];
11706  work_dimension = 1;
11707  }
11708  else if (temp_sz == 2){
11709  size[0] = params->global_work_size[0];
11710  size[1] = params->global_work_size[1];
11711  work_dimension = 2;
11712  }
11713  else{
11714  size[0] = params->global_work_size[0];
11715  size[1] = params->global_work_size[1];
11716  size[2] = params->global_work_size[2];
11717  if (temp_sz > 3){
11718  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11719  }
11720  }
11721  }
11722 
11723  cl_event gpuExec;
11724 
11725  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11726 
11727  clWaitForEvents(1, &gpuExec);
11728 
11729  int *result = (int *) malloc(typesz);
11730  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11731 
11732  v->assign(result, result+sz);
11733 
11734  if (typesz2 != typesz or sz != sz2){
11735  char *result2;
11736  result2 = (char *) malloc(typesz2);
11737  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11738 
11739  v2->assign(result2, result2+sz2);
11740  free(result2);
11741  }
11742  else{
11743  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11744 
11745  v2->assign(result, result+sz2);
11746  }
11747 
11748  clReleaseCommandQueue (queue);
11749  clReleaseMemObject(buffer);
11750  clReleaseMemObject(buffer2);
11751  clReleaseMemObject(buffer3);
11752  clReleaseEvent(gpuExec);
11753  free(result);
11754  }
11755 
11756  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
11757  size_t sz = v->size();
11758  size_t sz2 = v2->size();
11759  size_t sz3 = v3->size();
11760  size_t typesz = sizeof(int) * sz;
11761  size_t typesz2 = sizeof(char) * sz2;
11762  size_t typesz3 = sizeof(char) * sz3;
11763  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11764  if (temp_sz > 0){
11765  if (temp_sz > 2){
11766  if (temp_sz > 3){
11767  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11768  }
11769  typesz = sizeof(int) * params->buffers_size[0];
11770  typesz2 = sizeof(char) * params->buffers_size[1];
11771  typesz3 = sizeof(char) * params->buffers_size[2];
11772  }
11773  else{
11774  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11775  }
11776  }
11777  cl_int error = 0;
11778  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11779  checkError(error);
11780  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11781  checkError(error);
11782  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
11783  checkError(error);
11784  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11785  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11786  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11787  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11788 
11789  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11790  checkError (error);
11791  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11792  checkError (error);
11793  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
11794  checkError (error);
11795 
11796  size_t size[3] = {sz, sz2, sz3};
11797  size_t work_dimension = 3;
11798 
11799  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11800  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11801  work_dimension = 1;
11802  }
11803  else if(temp_sz > 0){
11804  if (params->multi_dimensional){
11805  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11806  For default multidimensional global work size, leave the global_work_size vector empty, \
11807  and set multi_dimensional to true. Setting the global work size based on the values inside \
11808  the global_work_size vector.");
11809  }
11810  if (temp_sz == 1){
11811  size[0] = params->global_work_size[0];
11812  work_dimension = 1;
11813  }
11814  else if (temp_sz == 2){
11815  size[0] = params->global_work_size[0];
11816  size[1] = params->global_work_size[1];
11817  work_dimension = 2;
11818  }
11819  else{
11820  size[0] = params->global_work_size[0];
11821  size[1] = params->global_work_size[1];
11822  size[2] = params->global_work_size[2];
11823  if (temp_sz > 3){
11824  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11825  }
11826  }
11827  }
11828 
11829  cl_event gpuExec;
11830 
11831  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11832 
11833  clWaitForEvents(1, &gpuExec);
11834 
11835  int *result = (int *) malloc(typesz);
11836  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11837 
11838  v->assign(result, result+sz);
11839 
11840  if (typesz2 != typesz or sz != sz2){
11841  char *result2;
11842  result2 = (char *) malloc(typesz2);
11843  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11844 
11845  v2->assign(result2, result2+sz2);
11846  free(result2);
11847  }
11848  else{
11849  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11850 
11851  v2->assign(result, result+sz2);
11852  }
11853 
11854  if (typesz3 != typesz or sz != sz3){
11855  char *result3;
11856  result3 = (char *) malloc(typesz3);
11857  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11858 
11859  v3->assign(result3, result3+sz3);
11860  free(result3);
11861  }
11862  else{
11863  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11864 
11865  v3->assign(result, result+sz3);
11866  }
11867 
11868  clReleaseCommandQueue (queue);
11869  clReleaseMemObject(buffer);
11870  clReleaseMemObject(buffer2);
11871  clReleaseMemObject(buffer3);
11872  clReleaseEvent(gpuExec);
11873  free(result);
11874  }
11875 
11876 
11877  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
11878  size_t sz = v.size();
11879  size_t sz2 = v2.size();
11880  size_t sz3 = v3.size();
11881  size_t typesz = sizeof(int) * sz;
11882  size_t typesz2 = sizeof(char) * sz2;
11883  size_t typesz3 = sizeof(int) * sz3;
11884  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11885  if (temp_sz > 0){
11886  if (temp_sz > 2){
11887  if (temp_sz > 3){
11888  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11889  }
11890  typesz = sizeof(int) * params->buffers_size[0];
11891  typesz2 = sizeof(char) * params->buffers_size[1];
11892  typesz3 = sizeof(int) * params->buffers_size[2];
11893  }
11894  else{
11895  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11896  }
11897  }
11898  cl_int error = 0;
11899  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11900  checkError(error);
11901  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11902  checkError(error);
11903  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11904  checkError(error);
11905  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
11906  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
11907  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
11908  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
11909 
11910  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11911  checkError (error);
11912  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11913  checkError (error);
11914  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11915  checkError (error);
11916 
11917  size_t size[3] = {sz, sz2, sz3};
11918  size_t work_dimension = 3;
11919 
11920  temp_sz = params != NULL ? params->global_work_size.size() : 0;
11921  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
11922  work_dimension = 1;
11923  }
11924  else if(temp_sz > 0){
11925  if (params->multi_dimensional){
11926  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
11927  For default multidimensional global work size, leave the global_work_size vector empty, \
11928  and set multi_dimensional to true. Setting the global work size based on the values inside \
11929  the global_work_size vector.");
11930  }
11931  if (temp_sz == 1){
11932  size[0] = params->global_work_size[0];
11933  work_dimension = 1;
11934  }
11935  else if (temp_sz == 2){
11936  size[0] = params->global_work_size[0];
11937  size[1] = params->global_work_size[1];
11938  work_dimension = 2;
11939  }
11940  else{
11941  size[0] = params->global_work_size[0];
11942  size[1] = params->global_work_size[1];
11943  size[2] = params->global_work_size[2];
11944  if (temp_sz > 3){
11945  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11946  }
11947  }
11948  }
11949 
11950  cl_event gpuExec;
11951 
11952  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11953 
11954  clWaitForEvents(1, &gpuExec);
11955 
11956  int *result = (int *) malloc(typesz);
11957  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11958 
11959  std::vector<int> res = std::vector<int>();
11960  res.assign(result, result+sz);
11961 
11962  clReleaseCommandQueue (queue);
11963  clReleaseMemObject(buffer);
11964  clReleaseMemObject(buffer2);
11965  clReleaseMemObject(buffer3);
11966  clReleaseEvent(gpuExec);
11967  free(result);
11968 
11969  return res;
11970  }
11971 
11972  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
11973  size_t sz = v->size();
11974  size_t sz2 = v2.size();
11975  size_t sz3 = v3.size();
11976  size_t typesz = sizeof(int) * sz;
11977  size_t typesz2 = sizeof(char) * sz2;
11978  size_t typesz3 = sizeof(int) * sz3;
11979  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
11980  if (temp_sz > 0){
11981  if (temp_sz > 2){
11982  if (temp_sz > 3){
11983  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11984  }
11985  typesz = sizeof(int) * params->buffers_size[0];
11986  typesz2 = sizeof(char) * params->buffers_size[1];
11987  typesz3 = sizeof(int) * params->buffers_size[2];
11988  }
11989  else{
11990  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11991  }
11992  }
11993  cl_int error = 0;
11994  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11995  checkError(error);
11996  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11997  checkError(error);
11998  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11999  checkError(error);
12000  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12001  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12002  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12003  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12004 
12005  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12006  checkError (error);
12007  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12008  checkError (error);
12009  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12010  checkError (error);
12011 
12012  size_t size[3] = {sz, sz2, sz3};
12013  size_t work_dimension = 3;
12014 
12015  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12016  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12017  work_dimension = 1;
12018  }
12019  else if(temp_sz > 0){
12020  if (params->multi_dimensional){
12021  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12022  For default multidimensional global work size, leave the global_work_size vector empty, \
12023  and set multi_dimensional to true. Setting the global work size based on the values inside \
12024  the global_work_size vector.");
12025  }
12026  if (temp_sz == 1){
12027  size[0] = params->global_work_size[0];
12028  work_dimension = 1;
12029  }
12030  else if (temp_sz == 2){
12031  size[0] = params->global_work_size[0];
12032  size[1] = params->global_work_size[1];
12033  work_dimension = 2;
12034  }
12035  else{
12036  size[0] = params->global_work_size[0];
12037  size[1] = params->global_work_size[1];
12038  size[2] = params->global_work_size[2];
12039  if (temp_sz > 3){
12040  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12041  }
12042  }
12043  }
12044 
12045  cl_event gpuExec;
12046 
12047  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12048 
12049  clWaitForEvents(1, &gpuExec);
12050 
12051  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12052 
12053  clWaitForEvents(1, &gpuExec);
12054 
12055  int *result = (int *) malloc(typesz);
12056  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12057 
12058  v->assign(result, result+sz);
12059 
12060  clReleaseCommandQueue (queue);
12061  clReleaseMemObject(buffer);
12062  clReleaseMemObject(buffer2);
12063  clReleaseMemObject(buffer3);
12064  clReleaseEvent(gpuExec);
12065  free(result);
12066  }
12067 
12068  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
12069  size_t sz = v->size();
12070  size_t sz2 = v2->size();
12071  size_t sz3 = v3.size();
12072  size_t typesz = sizeof(int) * sz;
12073  size_t typesz2 = sizeof(char) * sz2;
12074  size_t typesz3 = sizeof(int) * sz3;
12075  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12076  if (temp_sz > 0){
12077  if (temp_sz > 2){
12078  if (temp_sz > 3){
12079  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12080  }
12081  typesz = sizeof(int) * params->buffers_size[0];
12082  typesz2 = sizeof(char) * params->buffers_size[1];
12083  typesz3 = sizeof(int) * params->buffers_size[2];
12084  }
12085  else{
12086  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12087  }
12088  }
12089  cl_int error = 0;
12090  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12091  checkError(error);
12092  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12093  checkError(error);
12094  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12095  checkError(error);
12096  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12097  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12098  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12099  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12100 
12101  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12102  checkError (error);
12103  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12104  checkError (error);
12105  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12106  checkError (error);
12107 
12108  size_t size[3] = {sz, sz2, sz3};
12109  size_t work_dimension = 3;
12110 
12111  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12112  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12113  work_dimension = 1;
12114  }
12115  else if(temp_sz > 0){
12116  if (params->multi_dimensional){
12117  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12118  For default multidimensional global work size, leave the global_work_size vector empty, \
12119  and set multi_dimensional to true. Setting the global work size based on the values inside \
12120  the global_work_size vector.");
12121  }
12122  if (temp_sz == 1){
12123  size[0] = params->global_work_size[0];
12124  work_dimension = 1;
12125  }
12126  else if (temp_sz == 2){
12127  size[0] = params->global_work_size[0];
12128  size[1] = params->global_work_size[1];
12129  work_dimension = 2;
12130  }
12131  else{
12132  size[0] = params->global_work_size[0];
12133  size[1] = params->global_work_size[1];
12134  size[2] = params->global_work_size[2];
12135  if (temp_sz > 3){
12136  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12137  }
12138  }
12139  }
12140 
12141  cl_event gpuExec;
12142 
12143  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12144 
12145  clWaitForEvents(1, &gpuExec);
12146 
12147  int *result = (int *) malloc(typesz);
12148  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12149 
12150  v->assign(result, result+sz);
12151 
12152  if (typesz2 != typesz or sz != sz2){
12153  char *result2;
12154  result2 = (char *) malloc(typesz2);
12155  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12156 
12157  v2->assign(result2, result2+sz2);
12158  free(result2);
12159  }
12160  else{
12161  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12162 
12163  v2->assign(result, result+sz2);
12164  }
12165 
12166  clReleaseCommandQueue (queue);
12167  clReleaseMemObject(buffer);
12168  clReleaseMemObject(buffer2);
12169  clReleaseMemObject(buffer3);
12170  clReleaseEvent(gpuExec);
12171  free(result);
12172  }
12173 
12174  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
12175  size_t sz = v->size();
12176  size_t sz2 = v2->size();
12177  size_t sz3 = v3->size();
12178  size_t typesz = sizeof(int) * sz;
12179  size_t typesz2 = sizeof(char) * sz2;
12180  size_t typesz3 = sizeof(int) * sz3;
12181  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12182  if (temp_sz > 0){
12183  if (temp_sz > 2){
12184  if (temp_sz > 3){
12185  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12186  }
12187  typesz = sizeof(int) * params->buffers_size[0];
12188  typesz2 = sizeof(char) * params->buffers_size[1];
12189  typesz3 = sizeof(int) * params->buffers_size[2];
12190  }
12191  else{
12192  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12193  }
12194  }
12195  cl_int error = 0;
12196  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12197  checkError(error);
12198  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12199  checkError(error);
12200  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
12201  checkError(error);
12202  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12203  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12204  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12205  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12206 
12207  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12208  checkError (error);
12209  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12210  checkError (error);
12211  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
12212  checkError (error);
12213 
12214  size_t size[3] = {sz, sz2, sz3};
12215  size_t work_dimension = 3;
12216 
12217  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12218  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12219  work_dimension = 1;
12220  }
12221  else if(temp_sz > 0){
12222  if (params->multi_dimensional){
12223  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12224  For default multidimensional global work size, leave the global_work_size vector empty, \
12225  and set multi_dimensional to true. Setting the global work size based on the values inside \
12226  the global_work_size vector.");
12227  }
12228  if (temp_sz == 1){
12229  size[0] = params->global_work_size[0];
12230  work_dimension = 1;
12231  }
12232  else if (temp_sz == 2){
12233  size[0] = params->global_work_size[0];
12234  size[1] = params->global_work_size[1];
12235  work_dimension = 2;
12236  }
12237  else{
12238  size[0] = params->global_work_size[0];
12239  size[1] = params->global_work_size[1];
12240  size[2] = params->global_work_size[2];
12241  if (temp_sz > 3){
12242  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12243  }
12244  }
12245  }
12246 
12247  cl_event gpuExec;
12248 
12249  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12250 
12251  clWaitForEvents(1, &gpuExec);
12252 
12253  int *result = (int *) malloc(typesz);
12254  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12255 
12256  v->assign(result, result+sz);
12257 
12258  if (typesz2 != typesz or sz != sz2){
12259  char *result2;
12260  result2 = (char *) malloc(typesz2);
12261  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12262 
12263  v2->assign(result2, result2+sz2);
12264  free(result2);
12265  }
12266  else{
12267  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12268 
12269  v2->assign(result, result+sz2);
12270  }
12271 
12272  if (typesz3 != typesz or sz != sz3){
12273  int *result3;
12274  result3 = (int *) malloc(typesz3);
12275  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
12276 
12277  v3->assign(result3, result3+sz3);
12278  free(result3);
12279  }
12280  else{
12281  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12282 
12283  v3->assign(result, result+sz3);
12284  }
12285 
12286  clReleaseCommandQueue (queue);
12287  clReleaseMemObject(buffer);
12288  clReleaseMemObject(buffer2);
12289  clReleaseMemObject(buffer3);
12290  clReleaseEvent(gpuExec);
12291  free(result);
12292  }
12293 
12294 
12295  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
12296  size_t sz = v.size();
12297  size_t sz2 = v2.size();
12298  size_t sz3 = v3.size();
12299  size_t typesz = sizeof(int) * sz;
12300  size_t typesz2 = sizeof(char) * sz2;
12301  size_t typesz3 = sizeof(float) * sz3;
12302  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12303  if (temp_sz > 0){
12304  if (temp_sz > 2){
12305  if (temp_sz > 3){
12306  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12307  }
12308  typesz = sizeof(int) * params->buffers_size[0];
12309  typesz2 = sizeof(char) * params->buffers_size[1];
12310  typesz3 = sizeof(float) * params->buffers_size[2];
12311  }
12312  else{
12313  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12314  }
12315  }
12316  cl_int error = 0;
12317  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12318  checkError(error);
12319  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12320  checkError(error);
12321  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12322  checkError(error);
12323  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12324  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12325  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12326  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12327 
12328  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
12329  checkError (error);
12330  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12331  checkError (error);
12332  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12333  checkError (error);
12334 
12335  size_t size[3] = {sz, sz2, sz3};
12336  size_t work_dimension = 3;
12337 
12338  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12339  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12340  work_dimension = 1;
12341  }
12342  else if(temp_sz > 0){
12343  if (params->multi_dimensional){
12344  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12345  For default multidimensional global work size, leave the global_work_size vector empty, \
12346  and set multi_dimensional to true. Setting the global work size based on the values inside \
12347  the global_work_size vector.");
12348  }
12349  if (temp_sz == 1){
12350  size[0] = params->global_work_size[0];
12351  work_dimension = 1;
12352  }
12353  else if (temp_sz == 2){
12354  size[0] = params->global_work_size[0];
12355  size[1] = params->global_work_size[1];
12356  work_dimension = 2;
12357  }
12358  else{
12359  size[0] = params->global_work_size[0];
12360  size[1] = params->global_work_size[1];
12361  size[2] = params->global_work_size[2];
12362  if (temp_sz > 3){
12363  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12364  }
12365  }
12366  }
12367 
12368  cl_event gpuExec;
12369 
12370  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12371 
12372  clWaitForEvents(1, &gpuExec);
12373 
12374  int *result = (int *) malloc(typesz);
12375  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12376 
12377  std::vector<int> res = std::vector<int>();
12378  res.assign(result, result+sz);
12379 
12380  clReleaseCommandQueue (queue);
12381  clReleaseMemObject(buffer);
12382  clReleaseMemObject(buffer2);
12383  clReleaseMemObject(buffer3);
12384  clReleaseEvent(gpuExec);
12385  free(result);
12386 
12387  return res;
12388  }
12389 
12390  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
12391  size_t sz = v->size();
12392  size_t sz2 = v2.size();
12393  size_t sz3 = v3.size();
12394  size_t typesz = sizeof(int) * sz;
12395  size_t typesz2 = sizeof(char) * sz2;
12396  size_t typesz3 = sizeof(float) * sz3;
12397  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12398  if (temp_sz > 0){
12399  if (temp_sz > 2){
12400  if (temp_sz > 3){
12401  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12402  }
12403  typesz = sizeof(int) * params->buffers_size[0];
12404  typesz2 = sizeof(char) * params->buffers_size[1];
12405  typesz3 = sizeof(float) * params->buffers_size[2];
12406  }
12407  else{
12408  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12409  }
12410  }
12411  cl_int error = 0;
12412  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12413  checkError(error);
12414  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12415  checkError(error);
12416  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12417  checkError(error);
12418  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12419  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12420  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12421  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12422 
12423  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12424  checkError (error);
12425  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12426  checkError (error);
12427  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12428  checkError (error);
12429 
12430  size_t size[3] = {sz, sz2, sz3};
12431  size_t work_dimension = 3;
12432 
12433  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12434  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12435  work_dimension = 1;
12436  }
12437  else if(temp_sz > 0){
12438  if (params->multi_dimensional){
12439  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12440  For default multidimensional global work size, leave the global_work_size vector empty, \
12441  and set multi_dimensional to true. Setting the global work size based on the values inside \
12442  the global_work_size vector.");
12443  }
12444  if (temp_sz == 1){
12445  size[0] = params->global_work_size[0];
12446  work_dimension = 1;
12447  }
12448  else if (temp_sz == 2){
12449  size[0] = params->global_work_size[0];
12450  size[1] = params->global_work_size[1];
12451  work_dimension = 2;
12452  }
12453  else{
12454  size[0] = params->global_work_size[0];
12455  size[1] = params->global_work_size[1];
12456  size[2] = params->global_work_size[2];
12457  if (temp_sz > 3){
12458  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12459  }
12460  }
12461  }
12462 
12463  cl_event gpuExec;
12464 
12465  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12466 
12467  clWaitForEvents(1, &gpuExec);
12468 
12469  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12470 
12471  clWaitForEvents(1, &gpuExec);
12472 
12473  int *result = (int *) malloc(typesz);
12474  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12475 
12476  v->assign(result, result+sz);
12477 
12478  clReleaseCommandQueue (queue);
12479  clReleaseMemObject(buffer);
12480  clReleaseMemObject(buffer2);
12481  clReleaseMemObject(buffer3);
12482  clReleaseEvent(gpuExec);
12483  free(result);
12484  }
12485 
12486  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
12487  size_t sz = v->size();
12488  size_t sz2 = v2->size();
12489  size_t sz3 = v3.size();
12490  size_t typesz = sizeof(int) * sz;
12491  size_t typesz2 = sizeof(char) * sz2;
12492  size_t typesz3 = sizeof(float) * sz3;
12493  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12494  if (temp_sz > 0){
12495  if (temp_sz > 2){
12496  if (temp_sz > 3){
12497  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12498  }
12499  typesz = sizeof(int) * params->buffers_size[0];
12500  typesz2 = sizeof(char) * params->buffers_size[1];
12501  typesz3 = sizeof(float) * params->buffers_size[2];
12502  }
12503  else{
12504  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12505  }
12506  }
12507  cl_int error = 0;
12508  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12509  checkError(error);
12510  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12511  checkError(error);
12512  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12513  checkError(error);
12514  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12515  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12516  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12517  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12518 
12519  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12520  checkError (error);
12521  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12522  checkError (error);
12523  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12524  checkError (error);
12525 
12526  size_t size[3] = {sz, sz2, sz3};
12527  size_t work_dimension = 3;
12528 
12529  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12530  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12531  work_dimension = 1;
12532  }
12533  else if(temp_sz > 0){
12534  if (params->multi_dimensional){
12535  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12536  For default multidimensional global work size, leave the global_work_size vector empty, \
12537  and set multi_dimensional to true. Setting the global work size based on the values inside \
12538  the global_work_size vector.");
12539  }
12540  if (temp_sz == 1){
12541  size[0] = params->global_work_size[0];
12542  work_dimension = 1;
12543  }
12544  else if (temp_sz == 2){
12545  size[0] = params->global_work_size[0];
12546  size[1] = params->global_work_size[1];
12547  work_dimension = 2;
12548  }
12549  else{
12550  size[0] = params->global_work_size[0];
12551  size[1] = params->global_work_size[1];
12552  size[2] = params->global_work_size[2];
12553  if (temp_sz > 3){
12554  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12555  }
12556  }
12557  }
12558 
12559  cl_event gpuExec;
12560 
12561  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12562 
12563  clWaitForEvents(1, &gpuExec);
12564 
12565  int *result = (int *) malloc(typesz);
12566  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12567 
12568  v->assign(result, result+sz);
12569 
12570  if (typesz2 != typesz or sz != sz2){
12571  char *result2;
12572  result2 = (char *) malloc(typesz2);
12573  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12574 
12575  v2->assign(result2, result2+sz2);
12576  free(result2);
12577  }
12578  else{
12579  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12580 
12581  v2->assign(result, result+sz2);
12582  }
12583 
12584  clReleaseCommandQueue (queue);
12585  clReleaseMemObject(buffer);
12586  clReleaseMemObject(buffer2);
12587  clReleaseMemObject(buffer3);
12588  clReleaseEvent(gpuExec);
12589  free(result);
12590  }
12591 
12592  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
12593  size_t sz = v->size();
12594  size_t sz2 = v2->size();
12595  size_t sz3 = v3->size();
12596  size_t typesz = sizeof(int) * sz;
12597  size_t typesz2 = sizeof(char) * sz2;
12598  size_t typesz3 = sizeof(float) * sz3;
12599  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12600  if (temp_sz > 0){
12601  if (temp_sz > 2){
12602  if (temp_sz > 3){
12603  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12604  }
12605  typesz = sizeof(int) * params->buffers_size[0];
12606  typesz2 = sizeof(char) * params->buffers_size[1];
12607  typesz3 = sizeof(float) * params->buffers_size[2];
12608  }
12609  else{
12610  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12611  }
12612  }
12613  cl_int error = 0;
12614  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12615  checkError(error);
12616  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12617  checkError(error);
12618  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
12619  checkError(error);
12620  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12621  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12622  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12623  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12624 
12625  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12626  checkError (error);
12627  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12628  checkError (error);
12629  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
12630  checkError (error);
12631 
12632  size_t size[3] = {sz, sz2, sz3};
12633  size_t work_dimension = 3;
12634 
12635  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12636  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12637  work_dimension = 1;
12638  }
12639  else if(temp_sz > 0){
12640  if (params->multi_dimensional){
12641  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12642  For default multidimensional global work size, leave the global_work_size vector empty, \
12643  and set multi_dimensional to true. Setting the global work size based on the values inside \
12644  the global_work_size vector.");
12645  }
12646  if (temp_sz == 1){
12647  size[0] = params->global_work_size[0];
12648  work_dimension = 1;
12649  }
12650  else if (temp_sz == 2){
12651  size[0] = params->global_work_size[0];
12652  size[1] = params->global_work_size[1];
12653  work_dimension = 2;
12654  }
12655  else{
12656  size[0] = params->global_work_size[0];
12657  size[1] = params->global_work_size[1];
12658  size[2] = params->global_work_size[2];
12659  if (temp_sz > 3){
12660  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12661  }
12662  }
12663  }
12664 
12665  cl_event gpuExec;
12666 
12667  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12668 
12669  clWaitForEvents(1, &gpuExec);
12670 
12671  int *result = (int *) malloc(typesz);
12672  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12673 
12674  v->assign(result, result+sz);
12675 
12676  if (typesz2 != typesz or sz != sz2){
12677  char *result2;
12678  result2 = (char *) malloc(typesz2);
12679  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12680 
12681  v2->assign(result2, result2+sz2);
12682  free(result2);
12683  }
12684  else{
12685  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12686 
12687  v2->assign(result, result+sz2);
12688  }
12689 
12690  if (typesz3 != typesz or sz != sz3){
12691  float *result3;
12692  result3 = (float *) malloc(typesz3);
12693  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
12694 
12695  v3->assign(result3, result3+sz3);
12696  free(result3);
12697  }
12698  else{
12699  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12700 
12701  v3->assign(result, result+sz3);
12702  }
12703 
12704  clReleaseCommandQueue (queue);
12705  clReleaseMemObject(buffer);
12706  clReleaseMemObject(buffer2);
12707  clReleaseMemObject(buffer3);
12708  clReleaseEvent(gpuExec);
12709  free(result);
12710  }
12711 
12712 
12713  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
12714  size_t sz = v.size();
12715  size_t sz2 = v2.size();
12716  size_t sz3 = v3.size();
12717  size_t typesz = sizeof(int) * sz;
12718  size_t typesz2 = sizeof(char) * sz2;
12719  size_t typesz3 = sizeof(double) * sz3;
12720  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12721  if (temp_sz > 0){
12722  if (temp_sz > 2){
12723  if (temp_sz > 3){
12724  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12725  }
12726  typesz = sizeof(int) * params->buffers_size[0];
12727  typesz2 = sizeof(char) * params->buffers_size[1];
12728  typesz3 = sizeof(double) * params->buffers_size[2];
12729  }
12730  else{
12731  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12732  }
12733  }
12734  cl_int error = 0;
12735  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12736  checkError(error);
12737  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12738  checkError(error);
12739  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12740  checkError(error);
12741  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12742  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12743  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12744  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12745 
12746  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
12747  checkError (error);
12748  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12749  checkError (error);
12750  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12751  checkError (error);
12752 
12753  size_t size[3] = {sz, sz2, sz3};
12754  size_t work_dimension = 3;
12755 
12756  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12757  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12758  work_dimension = 1;
12759  }
12760  else if(temp_sz > 0){
12761  if (params->multi_dimensional){
12762  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12763  For default multidimensional global work size, leave the global_work_size vector empty, \
12764  and set multi_dimensional to true. Setting the global work size based on the values inside \
12765  the global_work_size vector.");
12766  }
12767  if (temp_sz == 1){
12768  size[0] = params->global_work_size[0];
12769  work_dimension = 1;
12770  }
12771  else if (temp_sz == 2){
12772  size[0] = params->global_work_size[0];
12773  size[1] = params->global_work_size[1];
12774  work_dimension = 2;
12775  }
12776  else{
12777  size[0] = params->global_work_size[0];
12778  size[1] = params->global_work_size[1];
12779  size[2] = params->global_work_size[2];
12780  if (temp_sz > 3){
12781  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12782  }
12783  }
12784  }
12785 
12786  cl_event gpuExec;
12787 
12788  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12789 
12790  clWaitForEvents(1, &gpuExec);
12791 
12792  int *result = (int *) malloc(typesz);
12793  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12794 
12795  std::vector<int> res = std::vector<int>();
12796  res.assign(result, result+sz);
12797 
12798  clReleaseCommandQueue (queue);
12799  clReleaseMemObject(buffer);
12800  clReleaseMemObject(buffer2);
12801  clReleaseMemObject(buffer3);
12802  clReleaseEvent(gpuExec);
12803  free(result);
12804 
12805  return res;
12806  }
12807 
12808  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
12809  size_t sz = v->size();
12810  size_t sz2 = v2.size();
12811  size_t sz3 = v3.size();
12812  size_t typesz = sizeof(int) * sz;
12813  size_t typesz2 = sizeof(char) * sz2;
12814  size_t typesz3 = sizeof(double) * sz3;
12815  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12816  if (temp_sz > 0){
12817  if (temp_sz > 2){
12818  if (temp_sz > 3){
12819  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12820  }
12821  typesz = sizeof(int) * params->buffers_size[0];
12822  typesz2 = sizeof(char) * params->buffers_size[1];
12823  typesz3 = sizeof(double) * params->buffers_size[2];
12824  }
12825  else{
12826  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12827  }
12828  }
12829  cl_int error = 0;
12830  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12831  checkError(error);
12832  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12833  checkError(error);
12834  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12835  checkError(error);
12836  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12837  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12838  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12839  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12840 
12841  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12842  checkError (error);
12843  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12844  checkError (error);
12845  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12846  checkError (error);
12847 
12848  size_t size[3] = {sz, sz2, sz3};
12849  size_t work_dimension = 3;
12850 
12851  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12852  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12853  work_dimension = 1;
12854  }
12855  else if(temp_sz > 0){
12856  if (params->multi_dimensional){
12857  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12858  For default multidimensional global work size, leave the global_work_size vector empty, \
12859  and set multi_dimensional to true. Setting the global work size based on the values inside \
12860  the global_work_size vector.");
12861  }
12862  if (temp_sz == 1){
12863  size[0] = params->global_work_size[0];
12864  work_dimension = 1;
12865  }
12866  else if (temp_sz == 2){
12867  size[0] = params->global_work_size[0];
12868  size[1] = params->global_work_size[1];
12869  work_dimension = 2;
12870  }
12871  else{
12872  size[0] = params->global_work_size[0];
12873  size[1] = params->global_work_size[1];
12874  size[2] = params->global_work_size[2];
12875  if (temp_sz > 3){
12876  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12877  }
12878  }
12879  }
12880 
12881  cl_event gpuExec;
12882 
12883  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12884 
12885  clWaitForEvents(1, &gpuExec);
12886 
12887  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12888 
12889  clWaitForEvents(1, &gpuExec);
12890 
12891  int *result = (int *) malloc(typesz);
12892  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12893 
12894  v->assign(result, result+sz);
12895 
12896  clReleaseCommandQueue (queue);
12897  clReleaseMemObject(buffer);
12898  clReleaseMemObject(buffer2);
12899  clReleaseMemObject(buffer3);
12900  clReleaseEvent(gpuExec);
12901  free(result);
12902  }
12903 
12904  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
12905  size_t sz = v->size();
12906  size_t sz2 = v2->size();
12907  size_t sz3 = v3.size();
12908  size_t typesz = sizeof(int) * sz;
12909  size_t typesz2 = sizeof(char) * sz2;
12910  size_t typesz3 = sizeof(double) * sz3;
12911  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
12912  if (temp_sz > 0){
12913  if (temp_sz > 2){
12914  if (temp_sz > 3){
12915  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12916  }
12917  typesz = sizeof(int) * params->buffers_size[0];
12918  typesz2 = sizeof(char) * params->buffers_size[1];
12919  typesz3 = sizeof(double) * params->buffers_size[2];
12920  }
12921  else{
12922  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12923  }
12924  }
12925  cl_int error = 0;
12926  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12927  checkError(error);
12928  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12929  checkError(error);
12930  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12931  checkError(error);
12932  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
12933  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
12934  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
12935  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
12936 
12937  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12938  checkError (error);
12939  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12940  checkError (error);
12941  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12942  checkError (error);
12943 
12944  size_t size[3] = {sz, sz2, sz3};
12945  size_t work_dimension = 3;
12946 
12947  temp_sz = params != NULL ? params->global_work_size.size() : 0;
12948  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
12949  work_dimension = 1;
12950  }
12951  else if(temp_sz > 0){
12952  if (params->multi_dimensional){
12953  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
12954  For default multidimensional global work size, leave the global_work_size vector empty, \
12955  and set multi_dimensional to true. Setting the global work size based on the values inside \
12956  the global_work_size vector.");
12957  }
12958  if (temp_sz == 1){
12959  size[0] = params->global_work_size[0];
12960  work_dimension = 1;
12961  }
12962  else if (temp_sz == 2){
12963  size[0] = params->global_work_size[0];
12964  size[1] = params->global_work_size[1];
12965  work_dimension = 2;
12966  }
12967  else{
12968  size[0] = params->global_work_size[0];
12969  size[1] = params->global_work_size[1];
12970  size[2] = params->global_work_size[2];
12971  if (temp_sz > 3){
12972  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12973  }
12974  }
12975  }
12976 
12977  cl_event gpuExec;
12978 
12979  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12980 
12981  clWaitForEvents(1, &gpuExec);
12982 
12983  int *result = (int *) malloc(typesz);
12984  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12985 
12986  v->assign(result, result+sz);
12987 
12988  if (typesz2 != typesz or sz != sz2){
12989  char *result2;
12990  result2 = (char *) malloc(typesz2);
12991  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12992 
12993  v2->assign(result2, result2+sz2);
12994  free(result2);
12995  }
12996  else{
12997  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12998 
12999  v2->assign(result, result+sz2);
13000  }
13001 
13002  clReleaseCommandQueue (queue);
13003  clReleaseMemObject(buffer);
13004  clReleaseMemObject(buffer2);
13005  clReleaseMemObject(buffer3);
13006  clReleaseEvent(gpuExec);
13007  free(result);
13008  }
13009 
13010  void ROS_OpenCL::process(std::vector<int>* v, std::vector<char>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
13011  size_t sz = v->size();
13012  size_t sz2 = v2->size();
13013  size_t sz3 = v3->size();
13014  size_t typesz = sizeof(int) * sz;
13015  size_t typesz2 = sizeof(char) * sz2;
13016  size_t typesz3 = sizeof(double) * sz3;
13017  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13018  if (temp_sz > 0){
13019  if (temp_sz > 2){
13020  if (temp_sz > 3){
13021  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13022  }
13023  typesz = sizeof(int) * params->buffers_size[0];
13024  typesz2 = sizeof(char) * params->buffers_size[1];
13025  typesz3 = sizeof(double) * params->buffers_size[2];
13026  }
13027  else{
13028  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13029  }
13030  }
13031  cl_int error = 0;
13032  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13033  checkError(error);
13034  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13035  checkError(error);
13036  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13037  checkError(error);
13038  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13039  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13040  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13041  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13042 
13043  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13044  checkError (error);
13045  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13046  checkError (error);
13047  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13048  checkError (error);
13049 
13050  size_t size[3] = {sz, sz2, sz3};
13051  size_t work_dimension = 3;
13052 
13053  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13054  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13055  work_dimension = 1;
13056  }
13057  else if(temp_sz > 0){
13058  if (params->multi_dimensional){
13059  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13060  For default multidimensional global work size, leave the global_work_size vector empty, \
13061  and set multi_dimensional to true. Setting the global work size based on the values inside \
13062  the global_work_size vector.");
13063  }
13064  if (temp_sz == 1){
13065  size[0] = params->global_work_size[0];
13066  work_dimension = 1;
13067  }
13068  else if (temp_sz == 2){
13069  size[0] = params->global_work_size[0];
13070  size[1] = params->global_work_size[1];
13071  work_dimension = 2;
13072  }
13073  else{
13074  size[0] = params->global_work_size[0];
13075  size[1] = params->global_work_size[1];
13076  size[2] = params->global_work_size[2];
13077  if (temp_sz > 3){
13078  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13079  }
13080  }
13081  }
13082 
13083  cl_event gpuExec;
13084 
13085  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13086 
13087  clWaitForEvents(1, &gpuExec);
13088 
13089  int *result = (int *) malloc(typesz);
13090  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13091 
13092  v->assign(result, result+sz);
13093 
13094  if (typesz2 != typesz or sz != sz2){
13095  char *result2;
13096  result2 = (char *) malloc(typesz2);
13097  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13098 
13099  v2->assign(result2, result2+sz2);
13100  free(result2);
13101  }
13102  else{
13103  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13104 
13105  v2->assign(result, result+sz2);
13106  }
13107 
13108  if (typesz3 != typesz or sz != sz3){
13109  double *result3;
13110  result3 = (double *) malloc(typesz3);
13111  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13112 
13113  v3->assign(result3, result3+sz3);
13114  free(result3);
13115  }
13116  else{
13117  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13118 
13119  v3->assign(result, result+sz3);
13120  }
13121 
13122  clReleaseCommandQueue (queue);
13123  clReleaseMemObject(buffer);
13124  clReleaseMemObject(buffer2);
13125  clReleaseMemObject(buffer3);
13126  clReleaseEvent(gpuExec);
13127  free(result);
13128  }
13129 
13130 
13131  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
13132  size_t sz = v.size();
13133  size_t sz2 = v2.size();
13134  size_t sz3 = v3.size();
13135  size_t typesz = sizeof(int) * sz;
13136  size_t typesz2 = sizeof(int) * sz2;
13137  size_t typesz3 = sizeof(char) * sz3;
13138  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13139  if (temp_sz > 0){
13140  if (temp_sz > 2){
13141  if (temp_sz > 3){
13142  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13143  }
13144  typesz = sizeof(int) * params->buffers_size[0];
13145  typesz2 = sizeof(int) * params->buffers_size[1];
13146  typesz3 = sizeof(char) * params->buffers_size[2];
13147  }
13148  else{
13149  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13150  }
13151  }
13152  cl_int error = 0;
13153  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13154  checkError(error);
13155  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13156  checkError(error);
13157  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13158  checkError(error);
13159  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13160  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13161  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13162  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13163 
13164  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
13165  checkError (error);
13166  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13167  checkError (error);
13168  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13169  checkError (error);
13170 
13171  size_t size[3] = {sz, sz2, sz3};
13172  size_t work_dimension = 3;
13173 
13174  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13175  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13176  work_dimension = 1;
13177  }
13178  else if(temp_sz > 0){
13179  if (params->multi_dimensional){
13180  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13181  For default multidimensional global work size, leave the global_work_size vector empty, \
13182  and set multi_dimensional to true. Setting the global work size based on the values inside \
13183  the global_work_size vector.");
13184  }
13185  if (temp_sz == 1){
13186  size[0] = params->global_work_size[0];
13187  work_dimension = 1;
13188  }
13189  else if (temp_sz == 2){
13190  size[0] = params->global_work_size[0];
13191  size[1] = params->global_work_size[1];
13192  work_dimension = 2;
13193  }
13194  else{
13195  size[0] = params->global_work_size[0];
13196  size[1] = params->global_work_size[1];
13197  size[2] = params->global_work_size[2];
13198  if (temp_sz > 3){
13199  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13200  }
13201  }
13202  }
13203 
13204  cl_event gpuExec;
13205 
13206  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13207 
13208  clWaitForEvents(1, &gpuExec);
13209 
13210  int *result = (int *) malloc(typesz);
13211  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13212 
13213  std::vector<int> res = std::vector<int>();
13214  res.assign(result, result+sz);
13215 
13216  clReleaseCommandQueue (queue);
13217  clReleaseMemObject(buffer);
13218  clReleaseMemObject(buffer2);
13219  clReleaseMemObject(buffer3);
13220  clReleaseEvent(gpuExec);
13221  free(result);
13222 
13223  return res;
13224  }
13225 
13226  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
13227  size_t sz = v->size();
13228  size_t sz2 = v2.size();
13229  size_t sz3 = v3.size();
13230  size_t typesz = sizeof(int) * sz;
13231  size_t typesz2 = sizeof(int) * sz2;
13232  size_t typesz3 = sizeof(char) * sz3;
13233  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13234  if (temp_sz > 0){
13235  if (temp_sz > 2){
13236  if (temp_sz > 3){
13237  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13238  }
13239  typesz = sizeof(int) * params->buffers_size[0];
13240  typesz2 = sizeof(int) * params->buffers_size[1];
13241  typesz3 = sizeof(char) * params->buffers_size[2];
13242  }
13243  else{
13244  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13245  }
13246  }
13247  cl_int error = 0;
13248  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13249  checkError(error);
13250  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13251  checkError(error);
13252  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13253  checkError(error);
13254  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13255  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13256  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13257  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13258 
13259  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13260  checkError (error);
13261  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13262  checkError (error);
13263  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13264  checkError (error);
13265 
13266  size_t size[3] = {sz, sz2, sz3};
13267  size_t work_dimension = 3;
13268 
13269  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13270  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13271  work_dimension = 1;
13272  }
13273  else if(temp_sz > 0){
13274  if (params->multi_dimensional){
13275  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13276  For default multidimensional global work size, leave the global_work_size vector empty, \
13277  and set multi_dimensional to true. Setting the global work size based on the values inside \
13278  the global_work_size vector.");
13279  }
13280  if (temp_sz == 1){
13281  size[0] = params->global_work_size[0];
13282  work_dimension = 1;
13283  }
13284  else if (temp_sz == 2){
13285  size[0] = params->global_work_size[0];
13286  size[1] = params->global_work_size[1];
13287  work_dimension = 2;
13288  }
13289  else{
13290  size[0] = params->global_work_size[0];
13291  size[1] = params->global_work_size[1];
13292  size[2] = params->global_work_size[2];
13293  if (temp_sz > 3){
13294  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13295  }
13296  }
13297  }
13298 
13299  cl_event gpuExec;
13300 
13301  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13302 
13303  clWaitForEvents(1, &gpuExec);
13304 
13305  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13306 
13307  clWaitForEvents(1, &gpuExec);
13308 
13309  int *result = (int *) malloc(typesz);
13310  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13311 
13312  v->assign(result, result+sz);
13313 
13314  clReleaseCommandQueue (queue);
13315  clReleaseMemObject(buffer);
13316  clReleaseMemObject(buffer2);
13317  clReleaseMemObject(buffer3);
13318  clReleaseEvent(gpuExec);
13319  free(result);
13320  }
13321 
13322  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
13323  size_t sz = v->size();
13324  size_t sz2 = v2->size();
13325  size_t sz3 = v3.size();
13326  size_t typesz = sizeof(int) * sz;
13327  size_t typesz2 = sizeof(int) * sz2;
13328  size_t typesz3 = sizeof(char) * sz3;
13329  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13330  if (temp_sz > 0){
13331  if (temp_sz > 2){
13332  if (temp_sz > 3){
13333  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13334  }
13335  typesz = sizeof(int) * params->buffers_size[0];
13336  typesz2 = sizeof(int) * params->buffers_size[1];
13337  typesz3 = sizeof(char) * params->buffers_size[2];
13338  }
13339  else{
13340  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13341  }
13342  }
13343  cl_int error = 0;
13344  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13345  checkError(error);
13346  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13347  checkError(error);
13348  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13349  checkError(error);
13350  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13351  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13352  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13353  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13354 
13355  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13356  checkError (error);
13357  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13358  checkError (error);
13359  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13360  checkError (error);
13361 
13362  size_t size[3] = {sz, sz2, sz3};
13363  size_t work_dimension = 3;
13364 
13365  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13366  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13367  work_dimension = 1;
13368  }
13369  else if(temp_sz > 0){
13370  if (params->multi_dimensional){
13371  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13372  For default multidimensional global work size, leave the global_work_size vector empty, \
13373  and set multi_dimensional to true. Setting the global work size based on the values inside \
13374  the global_work_size vector.");
13375  }
13376  if (temp_sz == 1){
13377  size[0] = params->global_work_size[0];
13378  work_dimension = 1;
13379  }
13380  else if (temp_sz == 2){
13381  size[0] = params->global_work_size[0];
13382  size[1] = params->global_work_size[1];
13383  work_dimension = 2;
13384  }
13385  else{
13386  size[0] = params->global_work_size[0];
13387  size[1] = params->global_work_size[1];
13388  size[2] = params->global_work_size[2];
13389  if (temp_sz > 3){
13390  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13391  }
13392  }
13393  }
13394 
13395  cl_event gpuExec;
13396 
13397  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13398 
13399  clWaitForEvents(1, &gpuExec);
13400 
13401  int *result = (int *) malloc(typesz);
13402  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13403 
13404  v->assign(result, result+sz);
13405 
13406  if (typesz2 != typesz or sz != sz2){
13407  int *result2;
13408  result2 = (int *) malloc(typesz2);
13409  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13410 
13411  v2->assign(result2, result2+sz2);
13412  free(result2);
13413  }
13414  else{
13415  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13416 
13417  v2->assign(result, result+sz2);
13418  }
13419 
13420  clReleaseCommandQueue (queue);
13421  clReleaseMemObject(buffer);
13422  clReleaseMemObject(buffer2);
13423  clReleaseMemObject(buffer3);
13424  clReleaseEvent(gpuExec);
13425  free(result);
13426  }
13427 
13428  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
13429  size_t sz = v->size();
13430  size_t sz2 = v2->size();
13431  size_t sz3 = v3->size();
13432  size_t typesz = sizeof(int) * sz;
13433  size_t typesz2 = sizeof(int) * sz2;
13434  size_t typesz3 = sizeof(char) * sz3;
13435  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13436  if (temp_sz > 0){
13437  if (temp_sz > 2){
13438  if (temp_sz > 3){
13439  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13440  }
13441  typesz = sizeof(int) * params->buffers_size[0];
13442  typesz2 = sizeof(int) * params->buffers_size[1];
13443  typesz3 = sizeof(char) * params->buffers_size[2];
13444  }
13445  else{
13446  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13447  }
13448  }
13449  cl_int error = 0;
13450  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13451  checkError(error);
13452  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13453  checkError(error);
13454  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13455  checkError(error);
13456  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13457  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13458  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13459  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13460 
13461  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13462  checkError (error);
13463  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13464  checkError (error);
13465  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13466  checkError (error);
13467 
13468  size_t size[3] = {sz, sz2, sz3};
13469  size_t work_dimension = 3;
13470 
13471  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13472  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13473  work_dimension = 1;
13474  }
13475  else if(temp_sz > 0){
13476  if (params->multi_dimensional){
13477  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13478  For default multidimensional global work size, leave the global_work_size vector empty, \
13479  and set multi_dimensional to true. Setting the global work size based on the values inside \
13480  the global_work_size vector.");
13481  }
13482  if (temp_sz == 1){
13483  size[0] = params->global_work_size[0];
13484  work_dimension = 1;
13485  }
13486  else if (temp_sz == 2){
13487  size[0] = params->global_work_size[0];
13488  size[1] = params->global_work_size[1];
13489  work_dimension = 2;
13490  }
13491  else{
13492  size[0] = params->global_work_size[0];
13493  size[1] = params->global_work_size[1];
13494  size[2] = params->global_work_size[2];
13495  if (temp_sz > 3){
13496  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13497  }
13498  }
13499  }
13500 
13501  cl_event gpuExec;
13502 
13503  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13504 
13505  clWaitForEvents(1, &gpuExec);
13506 
13507  int *result = (int *) malloc(typesz);
13508  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13509 
13510  v->assign(result, result+sz);
13511 
13512  if (typesz2 != typesz or sz != sz2){
13513  int *result2;
13514  result2 = (int *) malloc(typesz2);
13515  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13516 
13517  v2->assign(result2, result2+sz2);
13518  free(result2);
13519  }
13520  else{
13521  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13522 
13523  v2->assign(result, result+sz2);
13524  }
13525 
13526  if (typesz3 != typesz or sz != sz3){
13527  char *result3;
13528  result3 = (char *) malloc(typesz3);
13529  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13530 
13531  v3->assign(result3, result3+sz3);
13532  free(result3);
13533  }
13534  else{
13535  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13536 
13537  v3->assign(result, result+sz3);
13538  }
13539 
13540  clReleaseCommandQueue (queue);
13541  clReleaseMemObject(buffer);
13542  clReleaseMemObject(buffer2);
13543  clReleaseMemObject(buffer3);
13544  clReleaseEvent(gpuExec);
13545  free(result);
13546  }
13547 
13548 
13549  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
13550  size_t sz = v.size();
13551  size_t sz2 = v2.size();
13552  size_t sz3 = v3.size();
13553  size_t typesz = sizeof(int) * sz;
13554  size_t typesz2 = sizeof(int) * sz2;
13555  size_t typesz3 = sizeof(int) * sz3;
13556  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13557  if (temp_sz > 0){
13558  if (temp_sz > 2){
13559  if (temp_sz > 3){
13560  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13561  }
13562  typesz = sizeof(int) * params->buffers_size[0];
13563  typesz2 = sizeof(int) * params->buffers_size[1];
13564  typesz3 = sizeof(int) * params->buffers_size[2];
13565  }
13566  else{
13567  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13568  }
13569  }
13570  cl_int error = 0;
13571  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13572  checkError(error);
13573  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13574  checkError(error);
13575  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13576  checkError(error);
13577  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13578  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13579  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13580  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13581 
13582  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
13583  checkError (error);
13584  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13585  checkError (error);
13586  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13587  checkError (error);
13588 
13589  size_t size[3] = {sz, sz2, sz3};
13590  size_t work_dimension = 3;
13591 
13592  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13593  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13594  work_dimension = 1;
13595  }
13596  else if(temp_sz > 0){
13597  if (params->multi_dimensional){
13598  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13599  For default multidimensional global work size, leave the global_work_size vector empty, \
13600  and set multi_dimensional to true. Setting the global work size based on the values inside \
13601  the global_work_size vector.");
13602  }
13603  if (temp_sz == 1){
13604  size[0] = params->global_work_size[0];
13605  work_dimension = 1;
13606  }
13607  else if (temp_sz == 2){
13608  size[0] = params->global_work_size[0];
13609  size[1] = params->global_work_size[1];
13610  work_dimension = 2;
13611  }
13612  else{
13613  size[0] = params->global_work_size[0];
13614  size[1] = params->global_work_size[1];
13615  size[2] = params->global_work_size[2];
13616  if (temp_sz > 3){
13617  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13618  }
13619  }
13620  }
13621 
13622  cl_event gpuExec;
13623 
13624  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13625 
13626  clWaitForEvents(1, &gpuExec);
13627 
13628  int *result = (int *) malloc(typesz);
13629  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13630 
13631  std::vector<int> res = std::vector<int>();
13632  res.assign(result, result+sz);
13633 
13634  clReleaseCommandQueue (queue);
13635  clReleaseMemObject(buffer);
13636  clReleaseMemObject(buffer2);
13637  clReleaseMemObject(buffer3);
13638  clReleaseEvent(gpuExec);
13639  free(result);
13640 
13641  return res;
13642  }
13643 
13644  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
13645  size_t sz = v->size();
13646  size_t sz2 = v2.size();
13647  size_t sz3 = v3.size();
13648  size_t typesz = sizeof(int) * sz;
13649  size_t typesz2 = sizeof(int) * sz2;
13650  size_t typesz3 = sizeof(int) * sz3;
13651  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13652  if (temp_sz > 0){
13653  if (temp_sz > 2){
13654  if (temp_sz > 3){
13655  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13656  }
13657  typesz = sizeof(int) * params->buffers_size[0];
13658  typesz2 = sizeof(int) * params->buffers_size[1];
13659  typesz3 = sizeof(int) * params->buffers_size[2];
13660  }
13661  else{
13662  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13663  }
13664  }
13665  cl_int error = 0;
13666  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13667  checkError(error);
13668  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13669  checkError(error);
13670  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13671  checkError(error);
13672  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13673  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13674  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13675  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13676 
13677  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13678  checkError (error);
13679  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13680  checkError (error);
13681  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13682  checkError (error);
13683 
13684  size_t size[3] = {sz, sz2, sz3};
13685  size_t work_dimension = 3;
13686 
13687  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13688  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13689  work_dimension = 1;
13690  }
13691  else if(temp_sz > 0){
13692  if (params->multi_dimensional){
13693  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13694  For default multidimensional global work size, leave the global_work_size vector empty, \
13695  and set multi_dimensional to true. Setting the global work size based on the values inside \
13696  the global_work_size vector.");
13697  }
13698  if (temp_sz == 1){
13699  size[0] = params->global_work_size[0];
13700  work_dimension = 1;
13701  }
13702  else if (temp_sz == 2){
13703  size[0] = params->global_work_size[0];
13704  size[1] = params->global_work_size[1];
13705  work_dimension = 2;
13706  }
13707  else{
13708  size[0] = params->global_work_size[0];
13709  size[1] = params->global_work_size[1];
13710  size[2] = params->global_work_size[2];
13711  if (temp_sz > 3){
13712  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13713  }
13714  }
13715  }
13716 
13717  cl_event gpuExec;
13718 
13719  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13720 
13721  clWaitForEvents(1, &gpuExec);
13722 
13723  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13724 
13725  clWaitForEvents(1, &gpuExec);
13726 
13727  int *result = (int *) malloc(typesz);
13728  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13729 
13730  v->assign(result, result+sz);
13731 
13732  clReleaseCommandQueue (queue);
13733  clReleaseMemObject(buffer);
13734  clReleaseMemObject(buffer2);
13735  clReleaseMemObject(buffer3);
13736  clReleaseEvent(gpuExec);
13737  free(result);
13738  }
13739 
13740  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
13741  size_t sz = v->size();
13742  size_t sz2 = v2->size();
13743  size_t sz3 = v3.size();
13744  size_t typesz = sizeof(int) * sz;
13745  size_t typesz2 = sizeof(int) * sz2;
13746  size_t typesz3 = sizeof(int) * sz3;
13747  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13748  if (temp_sz > 0){
13749  if (temp_sz > 2){
13750  if (temp_sz > 3){
13751  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13752  }
13753  typesz = sizeof(int) * params->buffers_size[0];
13754  typesz2 = sizeof(int) * params->buffers_size[1];
13755  typesz3 = sizeof(int) * params->buffers_size[2];
13756  }
13757  else{
13758  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13759  }
13760  }
13761  cl_int error = 0;
13762  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13763  checkError(error);
13764  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13765  checkError(error);
13766  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13767  checkError(error);
13768  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13769  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13770  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13771  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13772 
13773  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13774  checkError (error);
13775  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13776  checkError (error);
13777  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13778  checkError (error);
13779 
13780  size_t size[3] = {sz, sz2, sz3};
13781  size_t work_dimension = 3;
13782 
13783  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13784  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13785  work_dimension = 1;
13786  }
13787  else if(temp_sz > 0){
13788  if (params->multi_dimensional){
13789  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13790  For default multidimensional global work size, leave the global_work_size vector empty, \
13791  and set multi_dimensional to true. Setting the global work size based on the values inside \
13792  the global_work_size vector.");
13793  }
13794  if (temp_sz == 1){
13795  size[0] = params->global_work_size[0];
13796  work_dimension = 1;
13797  }
13798  else if (temp_sz == 2){
13799  size[0] = params->global_work_size[0];
13800  size[1] = params->global_work_size[1];
13801  work_dimension = 2;
13802  }
13803  else{
13804  size[0] = params->global_work_size[0];
13805  size[1] = params->global_work_size[1];
13806  size[2] = params->global_work_size[2];
13807  if (temp_sz > 3){
13808  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13809  }
13810  }
13811  }
13812 
13813  cl_event gpuExec;
13814 
13815  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13816 
13817  clWaitForEvents(1, &gpuExec);
13818 
13819  int *result = (int *) malloc(typesz);
13820  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13821 
13822  v->assign(result, result+sz);
13823 
13824  if (typesz2 != typesz or sz != sz2){
13825  int *result2;
13826  result2 = (int *) malloc(typesz2);
13827  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13828 
13829  v2->assign(result2, result2+sz2);
13830  free(result2);
13831  }
13832  else{
13833  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13834 
13835  v2->assign(result, result+sz2);
13836  }
13837 
13838  clReleaseCommandQueue (queue);
13839  clReleaseMemObject(buffer);
13840  clReleaseMemObject(buffer2);
13841  clReleaseMemObject(buffer3);
13842  clReleaseEvent(gpuExec);
13843  free(result);
13844  }
13845 
13846  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
13847  size_t sz = v->size();
13848  size_t sz2 = v2->size();
13849  size_t sz3 = v3->size();
13850  size_t typesz = sizeof(int) * sz;
13851  size_t typesz2 = sizeof(int) * sz2;
13852  size_t typesz3 = sizeof(int) * sz3;
13853  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13854  if (temp_sz > 0){
13855  if (temp_sz > 2){
13856  if (temp_sz > 3){
13857  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13858  }
13859  typesz = sizeof(int) * params->buffers_size[0];
13860  typesz2 = sizeof(int) * params->buffers_size[1];
13861  typesz3 = sizeof(int) * params->buffers_size[2];
13862  }
13863  else{
13864  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13865  }
13866  }
13867  cl_int error = 0;
13868  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13869  checkError(error);
13870  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13871  checkError(error);
13872  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13873  checkError(error);
13874  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13875  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13876  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13877  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13878 
13879  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13880  checkError (error);
13881  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13882  checkError (error);
13883  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13884  checkError (error);
13885 
13886  size_t size[3] = {sz, sz2, sz3};
13887  size_t work_dimension = 3;
13888 
13889  temp_sz = params != NULL ? params->global_work_size.size() : 0;
13890  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
13891  work_dimension = 1;
13892  }
13893  else if(temp_sz > 0){
13894  if (params->multi_dimensional){
13895  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
13896  For default multidimensional global work size, leave the global_work_size vector empty, \
13897  and set multi_dimensional to true. Setting the global work size based on the values inside \
13898  the global_work_size vector.");
13899  }
13900  if (temp_sz == 1){
13901  size[0] = params->global_work_size[0];
13902  work_dimension = 1;
13903  }
13904  else if (temp_sz == 2){
13905  size[0] = params->global_work_size[0];
13906  size[1] = params->global_work_size[1];
13907  work_dimension = 2;
13908  }
13909  else{
13910  size[0] = params->global_work_size[0];
13911  size[1] = params->global_work_size[1];
13912  size[2] = params->global_work_size[2];
13913  if (temp_sz > 3){
13914  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13915  }
13916  }
13917  }
13918 
13919  cl_event gpuExec;
13920 
13921  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13922 
13923  clWaitForEvents(1, &gpuExec);
13924 
13925  int *result = (int *) malloc(typesz);
13926  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13927 
13928  v->assign(result, result+sz);
13929 
13930  if (typesz2 != typesz or sz != sz2){
13931  int *result2;
13932  result2 = (int *) malloc(typesz2);
13933  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13934 
13935  v2->assign(result2, result2+sz2);
13936  free(result2);
13937  }
13938  else{
13939  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13940 
13941  v2->assign(result, result+sz2);
13942  }
13943 
13944  if (typesz3 != typesz or sz != sz3){
13945  int *result3;
13946  result3 = (int *) malloc(typesz3);
13947  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13948 
13949  v3->assign(result3, result3+sz3);
13950  free(result3);
13951  }
13952  else{
13953  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13954 
13955  v3->assign(result, result+sz3);
13956  }
13957 
13958  clReleaseCommandQueue (queue);
13959  clReleaseMemObject(buffer);
13960  clReleaseMemObject(buffer2);
13961  clReleaseMemObject(buffer3);
13962  clReleaseEvent(gpuExec);
13963  free(result);
13964  }
13965 
13966 
13967  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
13968  size_t sz = v.size();
13969  size_t sz2 = v2.size();
13970  size_t sz3 = v3.size();
13971  size_t typesz = sizeof(int) * sz;
13972  size_t typesz2 = sizeof(int) * sz2;
13973  size_t typesz3 = sizeof(float) * sz3;
13974  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
13975  if (temp_sz > 0){
13976  if (temp_sz > 2){
13977  if (temp_sz > 3){
13978  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13979  }
13980  typesz = sizeof(int) * params->buffers_size[0];
13981  typesz2 = sizeof(int) * params->buffers_size[1];
13982  typesz3 = sizeof(float) * params->buffers_size[2];
13983  }
13984  else{
13985  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13986  }
13987  }
13988  cl_int error = 0;
13989  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13990  checkError(error);
13991  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13992  checkError(error);
13993  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13994  checkError(error);
13995  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
13996  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
13997  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
13998  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
13999 
14000  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14001  checkError (error);
14002  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14003  checkError (error);
14004  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14005  checkError (error);
14006 
14007  size_t size[3] = {sz, sz2, sz3};
14008  size_t work_dimension = 3;
14009 
14010  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14011  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14012  work_dimension = 1;
14013  }
14014  else if(temp_sz > 0){
14015  if (params->multi_dimensional){
14016  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14017  For default multidimensional global work size, leave the global_work_size vector empty, \
14018  and set multi_dimensional to true. Setting the global work size based on the values inside \
14019  the global_work_size vector.");
14020  }
14021  if (temp_sz == 1){
14022  size[0] = params->global_work_size[0];
14023  work_dimension = 1;
14024  }
14025  else if (temp_sz == 2){
14026  size[0] = params->global_work_size[0];
14027  size[1] = params->global_work_size[1];
14028  work_dimension = 2;
14029  }
14030  else{
14031  size[0] = params->global_work_size[0];
14032  size[1] = params->global_work_size[1];
14033  size[2] = params->global_work_size[2];
14034  if (temp_sz > 3){
14035  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14036  }
14037  }
14038  }
14039 
14040  cl_event gpuExec;
14041 
14042  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14043 
14044  clWaitForEvents(1, &gpuExec);
14045 
14046  int *result = (int *) malloc(typesz);
14047  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14048 
14049  std::vector<int> res = std::vector<int>();
14050  res.assign(result, result+sz);
14051 
14052  clReleaseCommandQueue (queue);
14053  clReleaseMemObject(buffer);
14054  clReleaseMemObject(buffer2);
14055  clReleaseMemObject(buffer3);
14056  clReleaseEvent(gpuExec);
14057  free(result);
14058 
14059  return res;
14060  }
14061 
14062  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
14063  size_t sz = v->size();
14064  size_t sz2 = v2.size();
14065  size_t sz3 = v3.size();
14066  size_t typesz = sizeof(int) * sz;
14067  size_t typesz2 = sizeof(int) * sz2;
14068  size_t typesz3 = sizeof(float) * sz3;
14069  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14070  if (temp_sz > 0){
14071  if (temp_sz > 2){
14072  if (temp_sz > 3){
14073  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14074  }
14075  typesz = sizeof(int) * params->buffers_size[0];
14076  typesz2 = sizeof(int) * params->buffers_size[1];
14077  typesz3 = sizeof(float) * params->buffers_size[2];
14078  }
14079  else{
14080  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14081  }
14082  }
14083  cl_int error = 0;
14084  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14085  checkError(error);
14086  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14087  checkError(error);
14088  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14089  checkError(error);
14090  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14091  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14092  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14093  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14094 
14095  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14096  checkError (error);
14097  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14098  checkError (error);
14099  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14100  checkError (error);
14101 
14102  size_t size[3] = {sz, sz2, sz3};
14103  size_t work_dimension = 3;
14104 
14105  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14106  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14107  work_dimension = 1;
14108  }
14109  else if(temp_sz > 0){
14110  if (params->multi_dimensional){
14111  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14112  For default multidimensional global work size, leave the global_work_size vector empty, \
14113  and set multi_dimensional to true. Setting the global work size based on the values inside \
14114  the global_work_size vector.");
14115  }
14116  if (temp_sz == 1){
14117  size[0] = params->global_work_size[0];
14118  work_dimension = 1;
14119  }
14120  else if (temp_sz == 2){
14121  size[0] = params->global_work_size[0];
14122  size[1] = params->global_work_size[1];
14123  work_dimension = 2;
14124  }
14125  else{
14126  size[0] = params->global_work_size[0];
14127  size[1] = params->global_work_size[1];
14128  size[2] = params->global_work_size[2];
14129  if (temp_sz > 3){
14130  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14131  }
14132  }
14133  }
14134 
14135  cl_event gpuExec;
14136 
14137  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14138 
14139  clWaitForEvents(1, &gpuExec);
14140 
14141  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14142 
14143  clWaitForEvents(1, &gpuExec);
14144 
14145  int *result = (int *) malloc(typesz);
14146  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14147 
14148  v->assign(result, result+sz);
14149 
14150  clReleaseCommandQueue (queue);
14151  clReleaseMemObject(buffer);
14152  clReleaseMemObject(buffer2);
14153  clReleaseMemObject(buffer3);
14154  clReleaseEvent(gpuExec);
14155  free(result);
14156  }
14157 
14158  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
14159  size_t sz = v->size();
14160  size_t sz2 = v2->size();
14161  size_t sz3 = v3.size();
14162  size_t typesz = sizeof(int) * sz;
14163  size_t typesz2 = sizeof(int) * sz2;
14164  size_t typesz3 = sizeof(float) * sz3;
14165  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14166  if (temp_sz > 0){
14167  if (temp_sz > 2){
14168  if (temp_sz > 3){
14169  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14170  }
14171  typesz = sizeof(int) * params->buffers_size[0];
14172  typesz2 = sizeof(int) * params->buffers_size[1];
14173  typesz3 = sizeof(float) * params->buffers_size[2];
14174  }
14175  else{
14176  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14177  }
14178  }
14179  cl_int error = 0;
14180  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14181  checkError(error);
14182  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14183  checkError(error);
14184  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14185  checkError(error);
14186  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14187  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14188  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14189  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14190 
14191  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14192  checkError (error);
14193  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14194  checkError (error);
14195  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14196  checkError (error);
14197 
14198  size_t size[3] = {sz, sz2, sz3};
14199  size_t work_dimension = 3;
14200 
14201  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14202  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14203  work_dimension = 1;
14204  }
14205  else if(temp_sz > 0){
14206  if (params->multi_dimensional){
14207  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14208  For default multidimensional global work size, leave the global_work_size vector empty, \
14209  and set multi_dimensional to true. Setting the global work size based on the values inside \
14210  the global_work_size vector.");
14211  }
14212  if (temp_sz == 1){
14213  size[0] = params->global_work_size[0];
14214  work_dimension = 1;
14215  }
14216  else if (temp_sz == 2){
14217  size[0] = params->global_work_size[0];
14218  size[1] = params->global_work_size[1];
14219  work_dimension = 2;
14220  }
14221  else{
14222  size[0] = params->global_work_size[0];
14223  size[1] = params->global_work_size[1];
14224  size[2] = params->global_work_size[2];
14225  if (temp_sz > 3){
14226  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14227  }
14228  }
14229  }
14230 
14231  cl_event gpuExec;
14232 
14233  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14234 
14235  clWaitForEvents(1, &gpuExec);
14236 
14237  int *result = (int *) malloc(typesz);
14238  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14239 
14240  v->assign(result, result+sz);
14241 
14242  if (typesz2 != typesz or sz != sz2){
14243  int *result2;
14244  result2 = (int *) malloc(typesz2);
14245  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14246 
14247  v2->assign(result2, result2+sz2);
14248  free(result2);
14249  }
14250  else{
14251  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14252 
14253  v2->assign(result, result+sz2);
14254  }
14255 
14256  clReleaseCommandQueue (queue);
14257  clReleaseMemObject(buffer);
14258  clReleaseMemObject(buffer2);
14259  clReleaseMemObject(buffer3);
14260  clReleaseEvent(gpuExec);
14261  free(result);
14262  }
14263 
14264  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
14265  size_t sz = v->size();
14266  size_t sz2 = v2->size();
14267  size_t sz3 = v3->size();
14268  size_t typesz = sizeof(int) * sz;
14269  size_t typesz2 = sizeof(int) * sz2;
14270  size_t typesz3 = sizeof(float) * sz3;
14271  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14272  if (temp_sz > 0){
14273  if (temp_sz > 2){
14274  if (temp_sz > 3){
14275  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14276  }
14277  typesz = sizeof(int) * params->buffers_size[0];
14278  typesz2 = sizeof(int) * params->buffers_size[1];
14279  typesz3 = sizeof(float) * params->buffers_size[2];
14280  }
14281  else{
14282  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14283  }
14284  }
14285  cl_int error = 0;
14286  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14287  checkError(error);
14288  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14289  checkError(error);
14290  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
14291  checkError(error);
14292  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14293  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14294  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14295  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14296 
14297  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14298  checkError (error);
14299  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14300  checkError (error);
14301  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
14302  checkError (error);
14303 
14304  size_t size[3] = {sz, sz2, sz3};
14305  size_t work_dimension = 3;
14306 
14307  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14308  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14309  work_dimension = 1;
14310  }
14311  else if(temp_sz > 0){
14312  if (params->multi_dimensional){
14313  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14314  For default multidimensional global work size, leave the global_work_size vector empty, \
14315  and set multi_dimensional to true. Setting the global work size based on the values inside \
14316  the global_work_size vector.");
14317  }
14318  if (temp_sz == 1){
14319  size[0] = params->global_work_size[0];
14320  work_dimension = 1;
14321  }
14322  else if (temp_sz == 2){
14323  size[0] = params->global_work_size[0];
14324  size[1] = params->global_work_size[1];
14325  work_dimension = 2;
14326  }
14327  else{
14328  size[0] = params->global_work_size[0];
14329  size[1] = params->global_work_size[1];
14330  size[2] = params->global_work_size[2];
14331  if (temp_sz > 3){
14332  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14333  }
14334  }
14335  }
14336 
14337  cl_event gpuExec;
14338 
14339  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14340 
14341  clWaitForEvents(1, &gpuExec);
14342 
14343  int *result = (int *) malloc(typesz);
14344  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14345 
14346  v->assign(result, result+sz);
14347 
14348  if (typesz2 != typesz or sz != sz2){
14349  int *result2;
14350  result2 = (int *) malloc(typesz2);
14351  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14352 
14353  v2->assign(result2, result2+sz2);
14354  free(result2);
14355  }
14356  else{
14357  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14358 
14359  v2->assign(result, result+sz2);
14360  }
14361 
14362  if (typesz3 != typesz or sz != sz3){
14363  float *result3;
14364  result3 = (float *) malloc(typesz3);
14365  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
14366 
14367  v3->assign(result3, result3+sz3);
14368  free(result3);
14369  }
14370  else{
14371  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14372 
14373  v3->assign(result, result+sz3);
14374  }
14375 
14376  clReleaseCommandQueue (queue);
14377  clReleaseMemObject(buffer);
14378  clReleaseMemObject(buffer2);
14379  clReleaseMemObject(buffer3);
14380  clReleaseEvent(gpuExec);
14381  free(result);
14382  }
14383 
14384 
14385  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
14386  size_t sz = v.size();
14387  size_t sz2 = v2.size();
14388  size_t sz3 = v3.size();
14389  size_t typesz = sizeof(int) * sz;
14390  size_t typesz2 = sizeof(int) * sz2;
14391  size_t typesz3 = sizeof(double) * sz3;
14392  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14393  if (temp_sz > 0){
14394  if (temp_sz > 2){
14395  if (temp_sz > 3){
14396  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14397  }
14398  typesz = sizeof(int) * params->buffers_size[0];
14399  typesz2 = sizeof(int) * params->buffers_size[1];
14400  typesz3 = sizeof(double) * params->buffers_size[2];
14401  }
14402  else{
14403  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14404  }
14405  }
14406  cl_int error = 0;
14407  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14408  checkError(error);
14409  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14410  checkError(error);
14411  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14412  checkError(error);
14413  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14414  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14415  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14416  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14417 
14418  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14419  checkError (error);
14420  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14421  checkError (error);
14422  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14423  checkError (error);
14424 
14425  size_t size[3] = {sz, sz2, sz3};
14426  size_t work_dimension = 3;
14427 
14428  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14429  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14430  work_dimension = 1;
14431  }
14432  else if(temp_sz > 0){
14433  if (params->multi_dimensional){
14434  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14435  For default multidimensional global work size, leave the global_work_size vector empty, \
14436  and set multi_dimensional to true. Setting the global work size based on the values inside \
14437  the global_work_size vector.");
14438  }
14439  if (temp_sz == 1){
14440  size[0] = params->global_work_size[0];
14441  work_dimension = 1;
14442  }
14443  else if (temp_sz == 2){
14444  size[0] = params->global_work_size[0];
14445  size[1] = params->global_work_size[1];
14446  work_dimension = 2;
14447  }
14448  else{
14449  size[0] = params->global_work_size[0];
14450  size[1] = params->global_work_size[1];
14451  size[2] = params->global_work_size[2];
14452  if (temp_sz > 3){
14453  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14454  }
14455  }
14456  }
14457 
14458  cl_event gpuExec;
14459 
14460  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14461 
14462  clWaitForEvents(1, &gpuExec);
14463 
14464  int *result = (int *) malloc(typesz);
14465  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14466 
14467  std::vector<int> res = std::vector<int>();
14468  res.assign(result, result+sz);
14469 
14470  clReleaseCommandQueue (queue);
14471  clReleaseMemObject(buffer);
14472  clReleaseMemObject(buffer2);
14473  clReleaseMemObject(buffer3);
14474  clReleaseEvent(gpuExec);
14475  free(result);
14476 
14477  return res;
14478  }
14479 
14480  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
14481  size_t sz = v->size();
14482  size_t sz2 = v2.size();
14483  size_t sz3 = v3.size();
14484  size_t typesz = sizeof(int) * sz;
14485  size_t typesz2 = sizeof(int) * sz2;
14486  size_t typesz3 = sizeof(double) * sz3;
14487  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14488  if (temp_sz > 0){
14489  if (temp_sz > 2){
14490  if (temp_sz > 3){
14491  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14492  }
14493  typesz = sizeof(int) * params->buffers_size[0];
14494  typesz2 = sizeof(int) * params->buffers_size[1];
14495  typesz3 = sizeof(double) * params->buffers_size[2];
14496  }
14497  else{
14498  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14499  }
14500  }
14501  cl_int error = 0;
14502  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14503  checkError(error);
14504  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14505  checkError(error);
14506  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14507  checkError(error);
14508  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14509  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14510  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14511  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14512 
14513  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14514  checkError (error);
14515  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14516  checkError (error);
14517  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14518  checkError (error);
14519 
14520  size_t size[3] = {sz, sz2, sz3};
14521  size_t work_dimension = 3;
14522 
14523  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14524  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14525  work_dimension = 1;
14526  }
14527  else if(temp_sz > 0){
14528  if (params->multi_dimensional){
14529  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14530  For default multidimensional global work size, leave the global_work_size vector empty, \
14531  and set multi_dimensional to true. Setting the global work size based on the values inside \
14532  the global_work_size vector.");
14533  }
14534  if (temp_sz == 1){
14535  size[0] = params->global_work_size[0];
14536  work_dimension = 1;
14537  }
14538  else if (temp_sz == 2){
14539  size[0] = params->global_work_size[0];
14540  size[1] = params->global_work_size[1];
14541  work_dimension = 2;
14542  }
14543  else{
14544  size[0] = params->global_work_size[0];
14545  size[1] = params->global_work_size[1];
14546  size[2] = params->global_work_size[2];
14547  if (temp_sz > 3){
14548  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14549  }
14550  }
14551  }
14552 
14553  cl_event gpuExec;
14554 
14555  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14556 
14557  clWaitForEvents(1, &gpuExec);
14558 
14559  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14560 
14561  clWaitForEvents(1, &gpuExec);
14562 
14563  int *result = (int *) malloc(typesz);
14564  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14565 
14566  v->assign(result, result+sz);
14567 
14568  clReleaseCommandQueue (queue);
14569  clReleaseMemObject(buffer);
14570  clReleaseMemObject(buffer2);
14571  clReleaseMemObject(buffer3);
14572  clReleaseEvent(gpuExec);
14573  free(result);
14574  }
14575 
14576  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
14577  size_t sz = v->size();
14578  size_t sz2 = v2->size();
14579  size_t sz3 = v3.size();
14580  size_t typesz = sizeof(int) * sz;
14581  size_t typesz2 = sizeof(int) * sz2;
14582  size_t typesz3 = sizeof(double) * sz3;
14583  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14584  if (temp_sz > 0){
14585  if (temp_sz > 2){
14586  if (temp_sz > 3){
14587  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14588  }
14589  typesz = sizeof(int) * params->buffers_size[0];
14590  typesz2 = sizeof(int) * params->buffers_size[1];
14591  typesz3 = sizeof(double) * params->buffers_size[2];
14592  }
14593  else{
14594  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14595  }
14596  }
14597  cl_int error = 0;
14598  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14599  checkError(error);
14600  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14601  checkError(error);
14602  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14603  checkError(error);
14604  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14605  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14606  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14607  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14608 
14609  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14610  checkError (error);
14611  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14612  checkError (error);
14613  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14614  checkError (error);
14615 
14616  size_t size[3] = {sz, sz2, sz3};
14617  size_t work_dimension = 3;
14618 
14619  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14620  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14621  work_dimension = 1;
14622  }
14623  else if(temp_sz > 0){
14624  if (params->multi_dimensional){
14625  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14626  For default multidimensional global work size, leave the global_work_size vector empty, \
14627  and set multi_dimensional to true. Setting the global work size based on the values inside \
14628  the global_work_size vector.");
14629  }
14630  if (temp_sz == 1){
14631  size[0] = params->global_work_size[0];
14632  work_dimension = 1;
14633  }
14634  else if (temp_sz == 2){
14635  size[0] = params->global_work_size[0];
14636  size[1] = params->global_work_size[1];
14637  work_dimension = 2;
14638  }
14639  else{
14640  size[0] = params->global_work_size[0];
14641  size[1] = params->global_work_size[1];
14642  size[2] = params->global_work_size[2];
14643  if (temp_sz > 3){
14644  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14645  }
14646  }
14647  }
14648 
14649  cl_event gpuExec;
14650 
14651  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14652 
14653  clWaitForEvents(1, &gpuExec);
14654 
14655  int *result = (int *) malloc(typesz);
14656  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14657 
14658  v->assign(result, result+sz);
14659 
14660  if (typesz2 != typesz or sz != sz2){
14661  int *result2;
14662  result2 = (int *) malloc(typesz2);
14663  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14664 
14665  v2->assign(result2, result2+sz2);
14666  free(result2);
14667  }
14668  else{
14669  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14670 
14671  v2->assign(result, result+sz2);
14672  }
14673 
14674  clReleaseCommandQueue (queue);
14675  clReleaseMemObject(buffer);
14676  clReleaseMemObject(buffer2);
14677  clReleaseMemObject(buffer3);
14678  clReleaseEvent(gpuExec);
14679  free(result);
14680  }
14681 
14682  void ROS_OpenCL::process(std::vector<int>* v, std::vector<int>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
14683  size_t sz = v->size();
14684  size_t sz2 = v2->size();
14685  size_t sz3 = v3->size();
14686  size_t typesz = sizeof(int) * sz;
14687  size_t typesz2 = sizeof(int) * sz2;
14688  size_t typesz3 = sizeof(double) * sz3;
14689  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14690  if (temp_sz > 0){
14691  if (temp_sz > 2){
14692  if (temp_sz > 3){
14693  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14694  }
14695  typesz = sizeof(int) * params->buffers_size[0];
14696  typesz2 = sizeof(int) * params->buffers_size[1];
14697  typesz3 = sizeof(double) * params->buffers_size[2];
14698  }
14699  else{
14700  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14701  }
14702  }
14703  cl_int error = 0;
14704  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14705  checkError(error);
14706  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14707  checkError(error);
14708  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
14709  checkError(error);
14710  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14711  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14712  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14713  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14714 
14715  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14716  checkError (error);
14717  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14718  checkError (error);
14719  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
14720  checkError (error);
14721 
14722  size_t size[3] = {sz, sz2, sz3};
14723  size_t work_dimension = 3;
14724 
14725  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14726  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14727  work_dimension = 1;
14728  }
14729  else if(temp_sz > 0){
14730  if (params->multi_dimensional){
14731  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14732  For default multidimensional global work size, leave the global_work_size vector empty, \
14733  and set multi_dimensional to true. Setting the global work size based on the values inside \
14734  the global_work_size vector.");
14735  }
14736  if (temp_sz == 1){
14737  size[0] = params->global_work_size[0];
14738  work_dimension = 1;
14739  }
14740  else if (temp_sz == 2){
14741  size[0] = params->global_work_size[0];
14742  size[1] = params->global_work_size[1];
14743  work_dimension = 2;
14744  }
14745  else{
14746  size[0] = params->global_work_size[0];
14747  size[1] = params->global_work_size[1];
14748  size[2] = params->global_work_size[2];
14749  if (temp_sz > 3){
14750  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14751  }
14752  }
14753  }
14754 
14755  cl_event gpuExec;
14756 
14757  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14758 
14759  clWaitForEvents(1, &gpuExec);
14760 
14761  int *result = (int *) malloc(typesz);
14762  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14763 
14764  v->assign(result, result+sz);
14765 
14766  if (typesz2 != typesz or sz != sz2){
14767  int *result2;
14768  result2 = (int *) malloc(typesz2);
14769  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14770 
14771  v2->assign(result2, result2+sz2);
14772  free(result2);
14773  }
14774  else{
14775  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14776 
14777  v2->assign(result, result+sz2);
14778  }
14779 
14780  if (typesz3 != typesz or sz != sz3){
14781  double *result3;
14782  result3 = (double *) malloc(typesz3);
14783  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
14784 
14785  v3->assign(result3, result3+sz3);
14786  free(result3);
14787  }
14788  else{
14789  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14790 
14791  v3->assign(result, result+sz3);
14792  }
14793 
14794  clReleaseCommandQueue (queue);
14795  clReleaseMemObject(buffer);
14796  clReleaseMemObject(buffer2);
14797  clReleaseMemObject(buffer3);
14798  clReleaseEvent(gpuExec);
14799  free(result);
14800  }
14801 
14802 
14803  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
14804  size_t sz = v.size();
14805  size_t sz2 = v2.size();
14806  size_t sz3 = v3.size();
14807  size_t typesz = sizeof(int) * sz;
14808  size_t typesz2 = sizeof(float) * sz2;
14809  size_t typesz3 = sizeof(char) * sz3;
14810  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14811  if (temp_sz > 0){
14812  if (temp_sz > 2){
14813  if (temp_sz > 3){
14814  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14815  }
14816  typesz = sizeof(int) * params->buffers_size[0];
14817  typesz2 = sizeof(float) * params->buffers_size[1];
14818  typesz3 = sizeof(char) * params->buffers_size[2];
14819  }
14820  else{
14821  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14822  }
14823  }
14824  cl_int error = 0;
14825  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14826  checkError(error);
14827  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14828  checkError(error);
14829  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14830  checkError(error);
14831  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14832  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14833  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14834  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14835 
14836  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14837  checkError (error);
14838  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14839  checkError (error);
14840  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14841  checkError (error);
14842 
14843  size_t size[3] = {sz, sz2, sz3};
14844  size_t work_dimension = 3;
14845 
14846  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14847  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14848  work_dimension = 1;
14849  }
14850  else if(temp_sz > 0){
14851  if (params->multi_dimensional){
14852  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14853  For default multidimensional global work size, leave the global_work_size vector empty, \
14854  and set multi_dimensional to true. Setting the global work size based on the values inside \
14855  the global_work_size vector.");
14856  }
14857  if (temp_sz == 1){
14858  size[0] = params->global_work_size[0];
14859  work_dimension = 1;
14860  }
14861  else if (temp_sz == 2){
14862  size[0] = params->global_work_size[0];
14863  size[1] = params->global_work_size[1];
14864  work_dimension = 2;
14865  }
14866  else{
14867  size[0] = params->global_work_size[0];
14868  size[1] = params->global_work_size[1];
14869  size[2] = params->global_work_size[2];
14870  if (temp_sz > 3){
14871  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14872  }
14873  }
14874  }
14875 
14876  cl_event gpuExec;
14877 
14878  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14879 
14880  clWaitForEvents(1, &gpuExec);
14881 
14882  int *result = (int *) malloc(typesz);
14883  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14884 
14885  std::vector<int> res = std::vector<int>();
14886  res.assign(result, result+sz);
14887 
14888  clReleaseCommandQueue (queue);
14889  clReleaseMemObject(buffer);
14890  clReleaseMemObject(buffer2);
14891  clReleaseMemObject(buffer3);
14892  clReleaseEvent(gpuExec);
14893  free(result);
14894 
14895  return res;
14896  }
14897 
14898  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
14899  size_t sz = v->size();
14900  size_t sz2 = v2.size();
14901  size_t sz3 = v3.size();
14902  size_t typesz = sizeof(int) * sz;
14903  size_t typesz2 = sizeof(float) * sz2;
14904  size_t typesz3 = sizeof(char) * sz3;
14905  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
14906  if (temp_sz > 0){
14907  if (temp_sz > 2){
14908  if (temp_sz > 3){
14909  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14910  }
14911  typesz = sizeof(int) * params->buffers_size[0];
14912  typesz2 = sizeof(float) * params->buffers_size[1];
14913  typesz3 = sizeof(char) * params->buffers_size[2];
14914  }
14915  else{
14916  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14917  }
14918  }
14919  cl_int error = 0;
14920  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14921  checkError(error);
14922  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14923  checkError(error);
14924  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14925  checkError(error);
14926  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
14927  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
14928  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
14929  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
14930 
14931  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14932  checkError (error);
14933  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14934  checkError (error);
14935  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14936  checkError (error);
14937 
14938  size_t size[3] = {sz, sz2, sz3};
14939  size_t work_dimension = 3;
14940 
14941  temp_sz = params != NULL ? params->global_work_size.size() : 0;
14942  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
14943  work_dimension = 1;
14944  }
14945  else if(temp_sz > 0){
14946  if (params->multi_dimensional){
14947  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
14948  For default multidimensional global work size, leave the global_work_size vector empty, \
14949  and set multi_dimensional to true. Setting the global work size based on the values inside \
14950  the global_work_size vector.");
14951  }
14952  if (temp_sz == 1){
14953  size[0] = params->global_work_size[0];
14954  work_dimension = 1;
14955  }
14956  else if (temp_sz == 2){
14957  size[0] = params->global_work_size[0];
14958  size[1] = params->global_work_size[1];
14959  work_dimension = 2;
14960  }
14961  else{
14962  size[0] = params->global_work_size[0];
14963  size[1] = params->global_work_size[1];
14964  size[2] = params->global_work_size[2];
14965  if (temp_sz > 3){
14966  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14967  }
14968  }
14969  }
14970 
14971  cl_event gpuExec;
14972 
14973  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14974 
14975  clWaitForEvents(1, &gpuExec);
14976 
14977  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14978 
14979  clWaitForEvents(1, &gpuExec);
14980 
14981  int *result = (int *) malloc(typesz);
14982  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14983 
14984  v->assign(result, result+sz);
14985 
14986  clReleaseCommandQueue (queue);
14987  clReleaseMemObject(buffer);
14988  clReleaseMemObject(buffer2);
14989  clReleaseMemObject(buffer3);
14990  clReleaseEvent(gpuExec);
14991  free(result);
14992  }
14993 
14994  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
14995  size_t sz = v->size();
14996  size_t sz2 = v2->size();
14997  size_t sz3 = v3.size();
14998  size_t typesz = sizeof(int) * sz;
14999  size_t typesz2 = sizeof(float) * sz2;
15000  size_t typesz3 = sizeof(char) * sz3;
15001  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15002  if (temp_sz > 0){
15003  if (temp_sz > 2){
15004  if (temp_sz > 3){
15005  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15006  }
15007  typesz = sizeof(int) * params->buffers_size[0];
15008  typesz2 = sizeof(float) * params->buffers_size[1];
15009  typesz3 = sizeof(char) * params->buffers_size[2];
15010  }
15011  else{
15012  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15013  }
15014  }
15015  cl_int error = 0;
15016  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15017  checkError(error);
15018  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15019  checkError(error);
15020  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15021  checkError(error);
15022  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15023  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15024  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15025  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15026 
15027  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15028  checkError (error);
15029  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15030  checkError (error);
15031  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15032  checkError (error);
15033 
15034  size_t size[3] = {sz, sz2, sz3};
15035  size_t work_dimension = 3;
15036 
15037  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15038  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15039  work_dimension = 1;
15040  }
15041  else if(temp_sz > 0){
15042  if (params->multi_dimensional){
15043  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15044  For default multidimensional global work size, leave the global_work_size vector empty, \
15045  and set multi_dimensional to true. Setting the global work size based on the values inside \
15046  the global_work_size vector.");
15047  }
15048  if (temp_sz == 1){
15049  size[0] = params->global_work_size[0];
15050  work_dimension = 1;
15051  }
15052  else if (temp_sz == 2){
15053  size[0] = params->global_work_size[0];
15054  size[1] = params->global_work_size[1];
15055  work_dimension = 2;
15056  }
15057  else{
15058  size[0] = params->global_work_size[0];
15059  size[1] = params->global_work_size[1];
15060  size[2] = params->global_work_size[2];
15061  if (temp_sz > 3){
15062  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15063  }
15064  }
15065  }
15066 
15067  cl_event gpuExec;
15068 
15069  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15070 
15071  clWaitForEvents(1, &gpuExec);
15072 
15073  int *result = (int *) malloc(typesz);
15074  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15075 
15076  v->assign(result, result+sz);
15077 
15078  if (typesz2 != typesz or sz != sz2){
15079  float *result2;
15080  result2 = (float *) malloc(typesz2);
15081  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15082 
15083  v2->assign(result2, result2+sz2);
15084  free(result2);
15085  }
15086  else{
15087  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15088 
15089  v2->assign(result, result+sz2);
15090  }
15091 
15092  clReleaseCommandQueue (queue);
15093  clReleaseMemObject(buffer);
15094  clReleaseMemObject(buffer2);
15095  clReleaseMemObject(buffer3);
15096  clReleaseEvent(gpuExec);
15097  free(result);
15098  }
15099 
15100  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
15101  size_t sz = v->size();
15102  size_t sz2 = v2->size();
15103  size_t sz3 = v3->size();
15104  size_t typesz = sizeof(int) * sz;
15105  size_t typesz2 = sizeof(float) * sz2;
15106  size_t typesz3 = sizeof(char) * sz3;
15107  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15108  if (temp_sz > 0){
15109  if (temp_sz > 2){
15110  if (temp_sz > 3){
15111  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15112  }
15113  typesz = sizeof(int) * params->buffers_size[0];
15114  typesz2 = sizeof(float) * params->buffers_size[1];
15115  typesz3 = sizeof(char) * params->buffers_size[2];
15116  }
15117  else{
15118  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15119  }
15120  }
15121  cl_int error = 0;
15122  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15123  checkError(error);
15124  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15125  checkError(error);
15126  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15127  checkError(error);
15128  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15129  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15130  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15131  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15132 
15133  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15134  checkError (error);
15135  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15136  checkError (error);
15137  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15138  checkError (error);
15139 
15140  size_t size[3] = {sz, sz2, sz3};
15141  size_t work_dimension = 3;
15142 
15143  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15144  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15145  work_dimension = 1;
15146  }
15147  else if(temp_sz > 0){
15148  if (params->multi_dimensional){
15149  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15150  For default multidimensional global work size, leave the global_work_size vector empty, \
15151  and set multi_dimensional to true. Setting the global work size based on the values inside \
15152  the global_work_size vector.");
15153  }
15154  if (temp_sz == 1){
15155  size[0] = params->global_work_size[0];
15156  work_dimension = 1;
15157  }
15158  else if (temp_sz == 2){
15159  size[0] = params->global_work_size[0];
15160  size[1] = params->global_work_size[1];
15161  work_dimension = 2;
15162  }
15163  else{
15164  size[0] = params->global_work_size[0];
15165  size[1] = params->global_work_size[1];
15166  size[2] = params->global_work_size[2];
15167  if (temp_sz > 3){
15168  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15169  }
15170  }
15171  }
15172 
15173  cl_event gpuExec;
15174 
15175  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15176 
15177  clWaitForEvents(1, &gpuExec);
15178 
15179  int *result = (int *) malloc(typesz);
15180  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15181 
15182  v->assign(result, result+sz);
15183 
15184  if (typesz2 != typesz or sz != sz2){
15185  float *result2;
15186  result2 = (float *) malloc(typesz2);
15187  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15188 
15189  v2->assign(result2, result2+sz2);
15190  free(result2);
15191  }
15192  else{
15193  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15194 
15195  v2->assign(result, result+sz2);
15196  }
15197 
15198  if (typesz3 != typesz or sz != sz3){
15199  char *result3;
15200  result3 = (char *) malloc(typesz3);
15201  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
15202 
15203  v3->assign(result3, result3+sz3);
15204  free(result3);
15205  }
15206  else{
15207  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15208 
15209  v3->assign(result, result+sz3);
15210  }
15211 
15212  clReleaseCommandQueue (queue);
15213  clReleaseMemObject(buffer);
15214  clReleaseMemObject(buffer2);
15215  clReleaseMemObject(buffer3);
15216  clReleaseEvent(gpuExec);
15217  free(result);
15218  }
15219 
15220 
15221  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
15222  size_t sz = v.size();
15223  size_t sz2 = v2.size();
15224  size_t sz3 = v3.size();
15225  size_t typesz = sizeof(int) * sz;
15226  size_t typesz2 = sizeof(float) * sz2;
15227  size_t typesz3 = sizeof(int) * sz3;
15228  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15229  if (temp_sz > 0){
15230  if (temp_sz > 2){
15231  if (temp_sz > 3){
15232  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15233  }
15234  typesz = sizeof(int) * params->buffers_size[0];
15235  typesz2 = sizeof(float) * params->buffers_size[1];
15236  typesz3 = sizeof(int) * params->buffers_size[2];
15237  }
15238  else{
15239  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15240  }
15241  }
15242  cl_int error = 0;
15243  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15244  checkError(error);
15245  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15246  checkError(error);
15247  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15248  checkError(error);
15249  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15250  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15251  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15252  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15253 
15254  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
15255  checkError (error);
15256  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15257  checkError (error);
15258  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15259  checkError (error);
15260 
15261  size_t size[3] = {sz, sz2, sz3};
15262  size_t work_dimension = 3;
15263 
15264  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15265  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15266  work_dimension = 1;
15267  }
15268  else if(temp_sz > 0){
15269  if (params->multi_dimensional){
15270  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15271  For default multidimensional global work size, leave the global_work_size vector empty, \
15272  and set multi_dimensional to true. Setting the global work size based on the values inside \
15273  the global_work_size vector.");
15274  }
15275  if (temp_sz == 1){
15276  size[0] = params->global_work_size[0];
15277  work_dimension = 1;
15278  }
15279  else if (temp_sz == 2){
15280  size[0] = params->global_work_size[0];
15281  size[1] = params->global_work_size[1];
15282  work_dimension = 2;
15283  }
15284  else{
15285  size[0] = params->global_work_size[0];
15286  size[1] = params->global_work_size[1];
15287  size[2] = params->global_work_size[2];
15288  if (temp_sz > 3){
15289  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15290  }
15291  }
15292  }
15293 
15294  cl_event gpuExec;
15295 
15296  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15297 
15298  clWaitForEvents(1, &gpuExec);
15299 
15300  int *result = (int *) malloc(typesz);
15301  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15302 
15303  std::vector<int> res = std::vector<int>();
15304  res.assign(result, result+sz);
15305 
15306  clReleaseCommandQueue (queue);
15307  clReleaseMemObject(buffer);
15308  clReleaseMemObject(buffer2);
15309  clReleaseMemObject(buffer3);
15310  clReleaseEvent(gpuExec);
15311  free(result);
15312 
15313  return res;
15314  }
15315 
15316  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
15317  size_t sz = v->size();
15318  size_t sz2 = v2.size();
15319  size_t sz3 = v3.size();
15320  size_t typesz = sizeof(int) * sz;
15321  size_t typesz2 = sizeof(float) * sz2;
15322  size_t typesz3 = sizeof(int) * sz3;
15323  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15324  if (temp_sz > 0){
15325  if (temp_sz > 2){
15326  if (temp_sz > 3){
15327  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15328  }
15329  typesz = sizeof(int) * params->buffers_size[0];
15330  typesz2 = sizeof(float) * params->buffers_size[1];
15331  typesz3 = sizeof(int) * params->buffers_size[2];
15332  }
15333  else{
15334  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15335  }
15336  }
15337  cl_int error = 0;
15338  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15339  checkError(error);
15340  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15341  checkError(error);
15342  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15343  checkError(error);
15344  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15345  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15346  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15347  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15348 
15349  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15350  checkError (error);
15351  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15352  checkError (error);
15353  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15354  checkError (error);
15355 
15356  size_t size[3] = {sz, sz2, sz3};
15357  size_t work_dimension = 3;
15358 
15359  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15360  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15361  work_dimension = 1;
15362  }
15363  else if(temp_sz > 0){
15364  if (params->multi_dimensional){
15365  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15366  For default multidimensional global work size, leave the global_work_size vector empty, \
15367  and set multi_dimensional to true. Setting the global work size based on the values inside \
15368  the global_work_size vector.");
15369  }
15370  if (temp_sz == 1){
15371  size[0] = params->global_work_size[0];
15372  work_dimension = 1;
15373  }
15374  else if (temp_sz == 2){
15375  size[0] = params->global_work_size[0];
15376  size[1] = params->global_work_size[1];
15377  work_dimension = 2;
15378  }
15379  else{
15380  size[0] = params->global_work_size[0];
15381  size[1] = params->global_work_size[1];
15382  size[2] = params->global_work_size[2];
15383  if (temp_sz > 3){
15384  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15385  }
15386  }
15387  }
15388 
15389  cl_event gpuExec;
15390 
15391  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15392 
15393  clWaitForEvents(1, &gpuExec);
15394 
15395  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15396 
15397  clWaitForEvents(1, &gpuExec);
15398 
15399  int *result = (int *) malloc(typesz);
15400  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15401 
15402  v->assign(result, result+sz);
15403 
15404  clReleaseCommandQueue (queue);
15405  clReleaseMemObject(buffer);
15406  clReleaseMemObject(buffer2);
15407  clReleaseMemObject(buffer3);
15408  clReleaseEvent(gpuExec);
15409  free(result);
15410  }
15411 
15412  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
15413  size_t sz = v->size();
15414  size_t sz2 = v2->size();
15415  size_t sz3 = v3.size();
15416  size_t typesz = sizeof(int) * sz;
15417  size_t typesz2 = sizeof(float) * sz2;
15418  size_t typesz3 = sizeof(int) * sz3;
15419  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15420  if (temp_sz > 0){
15421  if (temp_sz > 2){
15422  if (temp_sz > 3){
15423  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15424  }
15425  typesz = sizeof(int) * params->buffers_size[0];
15426  typesz2 = sizeof(float) * params->buffers_size[1];
15427  typesz3 = sizeof(int) * params->buffers_size[2];
15428  }
15429  else{
15430  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15431  }
15432  }
15433  cl_int error = 0;
15434  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15435  checkError(error);
15436  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15437  checkError(error);
15438  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15439  checkError(error);
15440  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15441  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15442  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15443  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15444 
15445  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15446  checkError (error);
15447  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15448  checkError (error);
15449  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15450  checkError (error);
15451 
15452  size_t size[3] = {sz, sz2, sz3};
15453  size_t work_dimension = 3;
15454 
15455  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15456  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15457  work_dimension = 1;
15458  }
15459  else if(temp_sz > 0){
15460  if (params->multi_dimensional){
15461  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15462  For default multidimensional global work size, leave the global_work_size vector empty, \
15463  and set multi_dimensional to true. Setting the global work size based on the values inside \
15464  the global_work_size vector.");
15465  }
15466  if (temp_sz == 1){
15467  size[0] = params->global_work_size[0];
15468  work_dimension = 1;
15469  }
15470  else if (temp_sz == 2){
15471  size[0] = params->global_work_size[0];
15472  size[1] = params->global_work_size[1];
15473  work_dimension = 2;
15474  }
15475  else{
15476  size[0] = params->global_work_size[0];
15477  size[1] = params->global_work_size[1];
15478  size[2] = params->global_work_size[2];
15479  if (temp_sz > 3){
15480  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15481  }
15482  }
15483  }
15484 
15485  cl_event gpuExec;
15486 
15487  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15488 
15489  clWaitForEvents(1, &gpuExec);
15490 
15491  int *result = (int *) malloc(typesz);
15492  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15493 
15494  v->assign(result, result+sz);
15495 
15496  if (typesz2 != typesz or sz != sz2){
15497  float *result2;
15498  result2 = (float *) malloc(typesz2);
15499  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15500 
15501  v2->assign(result2, result2+sz2);
15502  free(result2);
15503  }
15504  else{
15505  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15506 
15507  v2->assign(result, result+sz2);
15508  }
15509 
15510  clReleaseCommandQueue (queue);
15511  clReleaseMemObject(buffer);
15512  clReleaseMemObject(buffer2);
15513  clReleaseMemObject(buffer3);
15514  clReleaseEvent(gpuExec);
15515  free(result);
15516  }
15517 
15518  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
15519  size_t sz = v->size();
15520  size_t sz2 = v2->size();
15521  size_t sz3 = v3->size();
15522  size_t typesz = sizeof(int) * sz;
15523  size_t typesz2 = sizeof(float) * sz2;
15524  size_t typesz3 = sizeof(int) * sz3;
15525  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15526  if (temp_sz > 0){
15527  if (temp_sz > 2){
15528  if (temp_sz > 3){
15529  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15530  }
15531  typesz = sizeof(int) * params->buffers_size[0];
15532  typesz2 = sizeof(float) * params->buffers_size[1];
15533  typesz3 = sizeof(int) * params->buffers_size[2];
15534  }
15535  else{
15536  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15537  }
15538  }
15539  cl_int error = 0;
15540  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15541  checkError(error);
15542  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15543  checkError(error);
15544  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15545  checkError(error);
15546  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15547  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15548  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15549  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15550 
15551  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15552  checkError (error);
15553  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15554  checkError (error);
15555  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15556  checkError (error);
15557 
15558  size_t size[3] = {sz, sz2, sz3};
15559  size_t work_dimension = 3;
15560 
15561  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15562  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15563  work_dimension = 1;
15564  }
15565  else if(temp_sz > 0){
15566  if (params->multi_dimensional){
15567  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15568  For default multidimensional global work size, leave the global_work_size vector empty, \
15569  and set multi_dimensional to true. Setting the global work size based on the values inside \
15570  the global_work_size vector.");
15571  }
15572  if (temp_sz == 1){
15573  size[0] = params->global_work_size[0];
15574  work_dimension = 1;
15575  }
15576  else if (temp_sz == 2){
15577  size[0] = params->global_work_size[0];
15578  size[1] = params->global_work_size[1];
15579  work_dimension = 2;
15580  }
15581  else{
15582  size[0] = params->global_work_size[0];
15583  size[1] = params->global_work_size[1];
15584  size[2] = params->global_work_size[2];
15585  if (temp_sz > 3){
15586  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15587  }
15588  }
15589  }
15590 
15591  cl_event gpuExec;
15592 
15593  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15594 
15595  clWaitForEvents(1, &gpuExec);
15596 
15597  int *result = (int *) malloc(typesz);
15598  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15599 
15600  v->assign(result, result+sz);
15601 
15602  if (typesz2 != typesz or sz != sz2){
15603  float *result2;
15604  result2 = (float *) malloc(typesz2);
15605  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15606 
15607  v2->assign(result2, result2+sz2);
15608  free(result2);
15609  }
15610  else{
15611  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15612 
15613  v2->assign(result, result+sz2);
15614  }
15615 
15616  if (typesz3 != typesz or sz != sz3){
15617  int *result3;
15618  result3 = (int *) malloc(typesz3);
15619  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
15620 
15621  v3->assign(result3, result3+sz3);
15622  free(result3);
15623  }
15624  else{
15625  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15626 
15627  v3->assign(result, result+sz3);
15628  }
15629 
15630  clReleaseCommandQueue (queue);
15631  clReleaseMemObject(buffer);
15632  clReleaseMemObject(buffer2);
15633  clReleaseMemObject(buffer3);
15634  clReleaseEvent(gpuExec);
15635  free(result);
15636  }
15637 
15638 
15639  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
15640  size_t sz = v.size();
15641  size_t sz2 = v2.size();
15642  size_t sz3 = v3.size();
15643  size_t typesz = sizeof(int) * sz;
15644  size_t typesz2 = sizeof(float) * sz2;
15645  size_t typesz3 = sizeof(float) * sz3;
15646  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15647  if (temp_sz > 0){
15648  if (temp_sz > 2){
15649  if (temp_sz > 3){
15650  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15651  }
15652  typesz = sizeof(int) * params->buffers_size[0];
15653  typesz2 = sizeof(float) * params->buffers_size[1];
15654  typesz3 = sizeof(float) * params->buffers_size[2];
15655  }
15656  else{
15657  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15658  }
15659  }
15660  cl_int error = 0;
15661  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15662  checkError(error);
15663  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15664  checkError(error);
15665  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15666  checkError(error);
15667  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15668  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15669  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15670  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15671 
15672  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
15673  checkError (error);
15674  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15675  checkError (error);
15676  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15677  checkError (error);
15678 
15679  size_t size[3] = {sz, sz2, sz3};
15680  size_t work_dimension = 3;
15681 
15682  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15683  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15684  work_dimension = 1;
15685  }
15686  else if(temp_sz > 0){
15687  if (params->multi_dimensional){
15688  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15689  For default multidimensional global work size, leave the global_work_size vector empty, \
15690  and set multi_dimensional to true. Setting the global work size based on the values inside \
15691  the global_work_size vector.");
15692  }
15693  if (temp_sz == 1){
15694  size[0] = params->global_work_size[0];
15695  work_dimension = 1;
15696  }
15697  else if (temp_sz == 2){
15698  size[0] = params->global_work_size[0];
15699  size[1] = params->global_work_size[1];
15700  work_dimension = 2;
15701  }
15702  else{
15703  size[0] = params->global_work_size[0];
15704  size[1] = params->global_work_size[1];
15705  size[2] = params->global_work_size[2];
15706  if (temp_sz > 3){
15707  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15708  }
15709  }
15710  }
15711 
15712  cl_event gpuExec;
15713 
15714  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15715 
15716  clWaitForEvents(1, &gpuExec);
15717 
15718  int *result = (int *) malloc(typesz);
15719  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15720 
15721  std::vector<int> res = std::vector<int>();
15722  res.assign(result, result+sz);
15723 
15724  clReleaseCommandQueue (queue);
15725  clReleaseMemObject(buffer);
15726  clReleaseMemObject(buffer2);
15727  clReleaseMemObject(buffer3);
15728  clReleaseEvent(gpuExec);
15729  free(result);
15730 
15731  return res;
15732  }
15733 
15734  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
15735  size_t sz = v->size();
15736  size_t sz2 = v2.size();
15737  size_t sz3 = v3.size();
15738  size_t typesz = sizeof(int) * sz;
15739  size_t typesz2 = sizeof(float) * sz2;
15740  size_t typesz3 = sizeof(float) * sz3;
15741  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15742  if (temp_sz > 0){
15743  if (temp_sz > 2){
15744  if (temp_sz > 3){
15745  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15746  }
15747  typesz = sizeof(int) * params->buffers_size[0];
15748  typesz2 = sizeof(float) * params->buffers_size[1];
15749  typesz3 = sizeof(float) * params->buffers_size[2];
15750  }
15751  else{
15752  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15753  }
15754  }
15755  cl_int error = 0;
15756  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15757  checkError(error);
15758  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15759  checkError(error);
15760  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15761  checkError(error);
15762  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15763  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15764  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15765  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15766 
15767  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15768  checkError (error);
15769  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15770  checkError (error);
15771  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15772  checkError (error);
15773 
15774  size_t size[3] = {sz, sz2, sz3};
15775  size_t work_dimension = 3;
15776 
15777  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15778  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15779  work_dimension = 1;
15780  }
15781  else if(temp_sz > 0){
15782  if (params->multi_dimensional){
15783  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15784  For default multidimensional global work size, leave the global_work_size vector empty, \
15785  and set multi_dimensional to true. Setting the global work size based on the values inside \
15786  the global_work_size vector.");
15787  }
15788  if (temp_sz == 1){
15789  size[0] = params->global_work_size[0];
15790  work_dimension = 1;
15791  }
15792  else if (temp_sz == 2){
15793  size[0] = params->global_work_size[0];
15794  size[1] = params->global_work_size[1];
15795  work_dimension = 2;
15796  }
15797  else{
15798  size[0] = params->global_work_size[0];
15799  size[1] = params->global_work_size[1];
15800  size[2] = params->global_work_size[2];
15801  if (temp_sz > 3){
15802  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15803  }
15804  }
15805  }
15806 
15807  cl_event gpuExec;
15808 
15809  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15810 
15811  clWaitForEvents(1, &gpuExec);
15812 
15813  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15814 
15815  clWaitForEvents(1, &gpuExec);
15816 
15817  int *result = (int *) malloc(typesz);
15818  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15819 
15820  v->assign(result, result+sz);
15821 
15822  clReleaseCommandQueue (queue);
15823  clReleaseMemObject(buffer);
15824  clReleaseMemObject(buffer2);
15825  clReleaseMemObject(buffer3);
15826  clReleaseEvent(gpuExec);
15827  free(result);
15828  }
15829 
15830  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
15831  size_t sz = v->size();
15832  size_t sz2 = v2->size();
15833  size_t sz3 = v3.size();
15834  size_t typesz = sizeof(int) * sz;
15835  size_t typesz2 = sizeof(float) * sz2;
15836  size_t typesz3 = sizeof(float) * sz3;
15837  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15838  if (temp_sz > 0){
15839  if (temp_sz > 2){
15840  if (temp_sz > 3){
15841  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15842  }
15843  typesz = sizeof(int) * params->buffers_size[0];
15844  typesz2 = sizeof(float) * params->buffers_size[1];
15845  typesz3 = sizeof(float) * params->buffers_size[2];
15846  }
15847  else{
15848  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15849  }
15850  }
15851  cl_int error = 0;
15852  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15853  checkError(error);
15854  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15855  checkError(error);
15856  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15857  checkError(error);
15858  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15859  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15860  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15861  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15862 
15863  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15864  checkError (error);
15865  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15866  checkError (error);
15867  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15868  checkError (error);
15869 
15870  size_t size[3] = {sz, sz2, sz3};
15871  size_t work_dimension = 3;
15872 
15873  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15874  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15875  work_dimension = 1;
15876  }
15877  else if(temp_sz > 0){
15878  if (params->multi_dimensional){
15879  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15880  For default multidimensional global work size, leave the global_work_size vector empty, \
15881  and set multi_dimensional to true. Setting the global work size based on the values inside \
15882  the global_work_size vector.");
15883  }
15884  if (temp_sz == 1){
15885  size[0] = params->global_work_size[0];
15886  work_dimension = 1;
15887  }
15888  else if (temp_sz == 2){
15889  size[0] = params->global_work_size[0];
15890  size[1] = params->global_work_size[1];
15891  work_dimension = 2;
15892  }
15893  else{
15894  size[0] = params->global_work_size[0];
15895  size[1] = params->global_work_size[1];
15896  size[2] = params->global_work_size[2];
15897  if (temp_sz > 3){
15898  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15899  }
15900  }
15901  }
15902 
15903  cl_event gpuExec;
15904 
15905  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15906 
15907  clWaitForEvents(1, &gpuExec);
15908 
15909  int *result = (int *) malloc(typesz);
15910  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15911 
15912  v->assign(result, result+sz);
15913 
15914  if (typesz2 != typesz or sz != sz2){
15915  float *result2;
15916  result2 = (float *) malloc(typesz2);
15917  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15918 
15919  v2->assign(result2, result2+sz2);
15920  free(result2);
15921  }
15922  else{
15923  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15924 
15925  v2->assign(result, result+sz2);
15926  }
15927 
15928  clReleaseCommandQueue (queue);
15929  clReleaseMemObject(buffer);
15930  clReleaseMemObject(buffer2);
15931  clReleaseMemObject(buffer3);
15932  clReleaseEvent(gpuExec);
15933  free(result);
15934  }
15935 
15936  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
15937  size_t sz = v->size();
15938  size_t sz2 = v2->size();
15939  size_t sz3 = v3->size();
15940  size_t typesz = sizeof(int) * sz;
15941  size_t typesz2 = sizeof(float) * sz2;
15942  size_t typesz3 = sizeof(float) * sz3;
15943  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
15944  if (temp_sz > 0){
15945  if (temp_sz > 2){
15946  if (temp_sz > 3){
15947  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15948  }
15949  typesz = sizeof(int) * params->buffers_size[0];
15950  typesz2 = sizeof(float) * params->buffers_size[1];
15951  typesz3 = sizeof(float) * params->buffers_size[2];
15952  }
15953  else{
15954  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15955  }
15956  }
15957  cl_int error = 0;
15958  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15959  checkError(error);
15960  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15961  checkError(error);
15962  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15963  checkError(error);
15964  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
15965  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
15966  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
15967  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
15968 
15969  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15970  checkError (error);
15971  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15972  checkError (error);
15973  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15974  checkError (error);
15975 
15976  size_t size[3] = {sz, sz2, sz3};
15977  size_t work_dimension = 3;
15978 
15979  temp_sz = params != NULL ? params->global_work_size.size() : 0;
15980  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
15981  work_dimension = 1;
15982  }
15983  else if(temp_sz > 0){
15984  if (params->multi_dimensional){
15985  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
15986  For default multidimensional global work size, leave the global_work_size vector empty, \
15987  and set multi_dimensional to true. Setting the global work size based on the values inside \
15988  the global_work_size vector.");
15989  }
15990  if (temp_sz == 1){
15991  size[0] = params->global_work_size[0];
15992  work_dimension = 1;
15993  }
15994  else if (temp_sz == 2){
15995  size[0] = params->global_work_size[0];
15996  size[1] = params->global_work_size[1];
15997  work_dimension = 2;
15998  }
15999  else{
16000  size[0] = params->global_work_size[0];
16001  size[1] = params->global_work_size[1];
16002  size[2] = params->global_work_size[2];
16003  if (temp_sz > 3){
16004  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16005  }
16006  }
16007  }
16008 
16009  cl_event gpuExec;
16010 
16011  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16012 
16013  clWaitForEvents(1, &gpuExec);
16014 
16015  int *result = (int *) malloc(typesz);
16016  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16017 
16018  v->assign(result, result+sz);
16019 
16020  if (typesz2 != typesz or sz != sz2){
16021  float *result2;
16022  result2 = (float *) malloc(typesz2);
16023  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16024 
16025  v2->assign(result2, result2+sz2);
16026  free(result2);
16027  }
16028  else{
16029  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16030 
16031  v2->assign(result, result+sz2);
16032  }
16033 
16034  if (typesz3 != typesz or sz != sz3){
16035  float *result3;
16036  result3 = (float *) malloc(typesz3);
16037  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16038 
16039  v3->assign(result3, result3+sz3);
16040  free(result3);
16041  }
16042  else{
16043  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16044 
16045  v3->assign(result, result+sz3);
16046  }
16047 
16048  clReleaseCommandQueue (queue);
16049  clReleaseMemObject(buffer);
16050  clReleaseMemObject(buffer2);
16051  clReleaseMemObject(buffer3);
16052  clReleaseEvent(gpuExec);
16053  free(result);
16054  }
16055 
16056 
16057  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
16058  size_t sz = v.size();
16059  size_t sz2 = v2.size();
16060  size_t sz3 = v3.size();
16061  size_t typesz = sizeof(int) * sz;
16062  size_t typesz2 = sizeof(float) * sz2;
16063  size_t typesz3 = sizeof(double) * sz3;
16064  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16065  if (temp_sz > 0){
16066  if (temp_sz > 2){
16067  if (temp_sz > 3){
16068  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16069  }
16070  typesz = sizeof(int) * params->buffers_size[0];
16071  typesz2 = sizeof(float) * params->buffers_size[1];
16072  typesz3 = sizeof(double) * params->buffers_size[2];
16073  }
16074  else{
16075  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16076  }
16077  }
16078  cl_int error = 0;
16079  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16080  checkError(error);
16081  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16082  checkError(error);
16083  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16084  checkError(error);
16085  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16086  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16087  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16088  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16089 
16090  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16091  checkError (error);
16092  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16093  checkError (error);
16094  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16095  checkError (error);
16096 
16097  size_t size[3] = {sz, sz2, sz3};
16098  size_t work_dimension = 3;
16099 
16100  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16101  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16102  work_dimension = 1;
16103  }
16104  else if(temp_sz > 0){
16105  if (params->multi_dimensional){
16106  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16107  For default multidimensional global work size, leave the global_work_size vector empty, \
16108  and set multi_dimensional to true. Setting the global work size based on the values inside \
16109  the global_work_size vector.");
16110  }
16111  if (temp_sz == 1){
16112  size[0] = params->global_work_size[0];
16113  work_dimension = 1;
16114  }
16115  else if (temp_sz == 2){
16116  size[0] = params->global_work_size[0];
16117  size[1] = params->global_work_size[1];
16118  work_dimension = 2;
16119  }
16120  else{
16121  size[0] = params->global_work_size[0];
16122  size[1] = params->global_work_size[1];
16123  size[2] = params->global_work_size[2];
16124  if (temp_sz > 3){
16125  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16126  }
16127  }
16128  }
16129 
16130  cl_event gpuExec;
16131 
16132  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16133 
16134  clWaitForEvents(1, &gpuExec);
16135 
16136  int *result = (int *) malloc(typesz);
16137  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16138 
16139  std::vector<int> res = std::vector<int>();
16140  res.assign(result, result+sz);
16141 
16142  clReleaseCommandQueue (queue);
16143  clReleaseMemObject(buffer);
16144  clReleaseMemObject(buffer2);
16145  clReleaseMemObject(buffer3);
16146  clReleaseEvent(gpuExec);
16147  free(result);
16148 
16149  return res;
16150  }
16151 
16152  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
16153  size_t sz = v->size();
16154  size_t sz2 = v2.size();
16155  size_t sz3 = v3.size();
16156  size_t typesz = sizeof(int) * sz;
16157  size_t typesz2 = sizeof(float) * sz2;
16158  size_t typesz3 = sizeof(double) * sz3;
16159  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16160  if (temp_sz > 0){
16161  if (temp_sz > 2){
16162  if (temp_sz > 3){
16163  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16164  }
16165  typesz = sizeof(int) * params->buffers_size[0];
16166  typesz2 = sizeof(float) * params->buffers_size[1];
16167  typesz3 = sizeof(double) * params->buffers_size[2];
16168  }
16169  else{
16170  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16171  }
16172  }
16173  cl_int error = 0;
16174  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16175  checkError(error);
16176  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16177  checkError(error);
16178  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16179  checkError(error);
16180  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16181  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16182  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16183  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16184 
16185  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16186  checkError (error);
16187  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16188  checkError (error);
16189  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16190  checkError (error);
16191 
16192  size_t size[3] = {sz, sz2, sz3};
16193  size_t work_dimension = 3;
16194 
16195  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16196  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16197  work_dimension = 1;
16198  }
16199  else if(temp_sz > 0){
16200  if (params->multi_dimensional){
16201  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16202  For default multidimensional global work size, leave the global_work_size vector empty, \
16203  and set multi_dimensional to true. Setting the global work size based on the values inside \
16204  the global_work_size vector.");
16205  }
16206  if (temp_sz == 1){
16207  size[0] = params->global_work_size[0];
16208  work_dimension = 1;
16209  }
16210  else if (temp_sz == 2){
16211  size[0] = params->global_work_size[0];
16212  size[1] = params->global_work_size[1];
16213  work_dimension = 2;
16214  }
16215  else{
16216  size[0] = params->global_work_size[0];
16217  size[1] = params->global_work_size[1];
16218  size[2] = params->global_work_size[2];
16219  if (temp_sz > 3){
16220  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16221  }
16222  }
16223  }
16224 
16225  cl_event gpuExec;
16226 
16227  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16228 
16229  clWaitForEvents(1, &gpuExec);
16230 
16231  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16232 
16233  clWaitForEvents(1, &gpuExec);
16234 
16235  int *result = (int *) malloc(typesz);
16236  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16237 
16238  v->assign(result, result+sz);
16239 
16240  clReleaseCommandQueue (queue);
16241  clReleaseMemObject(buffer);
16242  clReleaseMemObject(buffer2);
16243  clReleaseMemObject(buffer3);
16244  clReleaseEvent(gpuExec);
16245  free(result);
16246  }
16247 
16248  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
16249  size_t sz = v->size();
16250  size_t sz2 = v2->size();
16251  size_t sz3 = v3.size();
16252  size_t typesz = sizeof(int) * sz;
16253  size_t typesz2 = sizeof(float) * sz2;
16254  size_t typesz3 = sizeof(double) * sz3;
16255  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16256  if (temp_sz > 0){
16257  if (temp_sz > 2){
16258  if (temp_sz > 3){
16259  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16260  }
16261  typesz = sizeof(int) * params->buffers_size[0];
16262  typesz2 = sizeof(float) * params->buffers_size[1];
16263  typesz3 = sizeof(double) * params->buffers_size[2];
16264  }
16265  else{
16266  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16267  }
16268  }
16269  cl_int error = 0;
16270  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16271  checkError(error);
16272  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16273  checkError(error);
16274  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16275  checkError(error);
16276  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16277  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16278  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16279  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16280 
16281  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16282  checkError (error);
16283  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16284  checkError (error);
16285  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16286  checkError (error);
16287 
16288  size_t size[3] = {sz, sz2, sz3};
16289  size_t work_dimension = 3;
16290 
16291  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16292  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16293  work_dimension = 1;
16294  }
16295  else if(temp_sz > 0){
16296  if (params->multi_dimensional){
16297  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16298  For default multidimensional global work size, leave the global_work_size vector empty, \
16299  and set multi_dimensional to true. Setting the global work size based on the values inside \
16300  the global_work_size vector.");
16301  }
16302  if (temp_sz == 1){
16303  size[0] = params->global_work_size[0];
16304  work_dimension = 1;
16305  }
16306  else if (temp_sz == 2){
16307  size[0] = params->global_work_size[0];
16308  size[1] = params->global_work_size[1];
16309  work_dimension = 2;
16310  }
16311  else{
16312  size[0] = params->global_work_size[0];
16313  size[1] = params->global_work_size[1];
16314  size[2] = params->global_work_size[2];
16315  if (temp_sz > 3){
16316  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16317  }
16318  }
16319  }
16320 
16321  cl_event gpuExec;
16322 
16323  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16324 
16325  clWaitForEvents(1, &gpuExec);
16326 
16327  int *result = (int *) malloc(typesz);
16328  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16329 
16330  v->assign(result, result+sz);
16331 
16332  if (typesz2 != typesz or sz != sz2){
16333  float *result2;
16334  result2 = (float *) malloc(typesz2);
16335  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16336 
16337  v2->assign(result2, result2+sz2);
16338  free(result2);
16339  }
16340  else{
16341  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16342 
16343  v2->assign(result, result+sz2);
16344  }
16345 
16346  clReleaseCommandQueue (queue);
16347  clReleaseMemObject(buffer);
16348  clReleaseMemObject(buffer2);
16349  clReleaseMemObject(buffer3);
16350  clReleaseEvent(gpuExec);
16351  free(result);
16352  }
16353 
16354  void ROS_OpenCL::process(std::vector<int>* v, std::vector<float>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
16355  size_t sz = v->size();
16356  size_t sz2 = v2->size();
16357  size_t sz3 = v3->size();
16358  size_t typesz = sizeof(int) * sz;
16359  size_t typesz2 = sizeof(float) * sz2;
16360  size_t typesz3 = sizeof(double) * sz3;
16361  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16362  if (temp_sz > 0){
16363  if (temp_sz > 2){
16364  if (temp_sz > 3){
16365  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16366  }
16367  typesz = sizeof(int) * params->buffers_size[0];
16368  typesz2 = sizeof(float) * params->buffers_size[1];
16369  typesz3 = sizeof(double) * params->buffers_size[2];
16370  }
16371  else{
16372  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16373  }
16374  }
16375  cl_int error = 0;
16376  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16377  checkError(error);
16378  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16379  checkError(error);
16380  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
16381  checkError(error);
16382  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16383  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16384  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16385  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16386 
16387  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16388  checkError (error);
16389  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16390  checkError (error);
16391  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
16392  checkError (error);
16393 
16394  size_t size[3] = {sz, sz2, sz3};
16395  size_t work_dimension = 3;
16396 
16397  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16398  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16399  work_dimension = 1;
16400  }
16401  else if(temp_sz > 0){
16402  if (params->multi_dimensional){
16403  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16404  For default multidimensional global work size, leave the global_work_size vector empty, \
16405  and set multi_dimensional to true. Setting the global work size based on the values inside \
16406  the global_work_size vector.");
16407  }
16408  if (temp_sz == 1){
16409  size[0] = params->global_work_size[0];
16410  work_dimension = 1;
16411  }
16412  else if (temp_sz == 2){
16413  size[0] = params->global_work_size[0];
16414  size[1] = params->global_work_size[1];
16415  work_dimension = 2;
16416  }
16417  else{
16418  size[0] = params->global_work_size[0];
16419  size[1] = params->global_work_size[1];
16420  size[2] = params->global_work_size[2];
16421  if (temp_sz > 3){
16422  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16423  }
16424  }
16425  }
16426 
16427  cl_event gpuExec;
16428 
16429  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16430 
16431  clWaitForEvents(1, &gpuExec);
16432 
16433  int *result = (int *) malloc(typesz);
16434  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16435 
16436  v->assign(result, result+sz);
16437 
16438  if (typesz2 != typesz or sz != sz2){
16439  float *result2;
16440  result2 = (float *) malloc(typesz2);
16441  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16442 
16443  v2->assign(result2, result2+sz2);
16444  free(result2);
16445  }
16446  else{
16447  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16448 
16449  v2->assign(result, result+sz2);
16450  }
16451 
16452  if (typesz3 != typesz or sz != sz3){
16453  double *result3;
16454  result3 = (double *) malloc(typesz3);
16455  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16456 
16457  v3->assign(result3, result3+sz3);
16458  free(result3);
16459  }
16460  else{
16461  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16462 
16463  v3->assign(result, result+sz3);
16464  }
16465 
16466  clReleaseCommandQueue (queue);
16467  clReleaseMemObject(buffer);
16468  clReleaseMemObject(buffer2);
16469  clReleaseMemObject(buffer3);
16470  clReleaseEvent(gpuExec);
16471  free(result);
16472  }
16473 
16474 
16475  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
16476  size_t sz = v.size();
16477  size_t sz2 = v2.size();
16478  size_t sz3 = v3.size();
16479  size_t typesz = sizeof(int) * sz;
16480  size_t typesz2 = sizeof(double) * sz2;
16481  size_t typesz3 = sizeof(char) * sz3;
16482  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16483  if (temp_sz > 0){
16484  if (temp_sz > 2){
16485  if (temp_sz > 3){
16486  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16487  }
16488  typesz = sizeof(int) * params->buffers_size[0];
16489  typesz2 = sizeof(double) * params->buffers_size[1];
16490  typesz3 = sizeof(char) * params->buffers_size[2];
16491  }
16492  else{
16493  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16494  }
16495  }
16496  cl_int error = 0;
16497  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16498  checkError(error);
16499  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16500  checkError(error);
16501  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16502  checkError(error);
16503  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16504  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16505  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16506  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16507 
16508  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16509  checkError (error);
16510  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16511  checkError (error);
16512  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16513  checkError (error);
16514 
16515  size_t size[3] = {sz, sz2, sz3};
16516  size_t work_dimension = 3;
16517 
16518  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16519  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16520  work_dimension = 1;
16521  }
16522  else if(temp_sz > 0){
16523  if (params->multi_dimensional){
16524  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16525  For default multidimensional global work size, leave the global_work_size vector empty, \
16526  and set multi_dimensional to true. Setting the global work size based on the values inside \
16527  the global_work_size vector.");
16528  }
16529  if (temp_sz == 1){
16530  size[0] = params->global_work_size[0];
16531  work_dimension = 1;
16532  }
16533  else if (temp_sz == 2){
16534  size[0] = params->global_work_size[0];
16535  size[1] = params->global_work_size[1];
16536  work_dimension = 2;
16537  }
16538  else{
16539  size[0] = params->global_work_size[0];
16540  size[1] = params->global_work_size[1];
16541  size[2] = params->global_work_size[2];
16542  if (temp_sz > 3){
16543  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16544  }
16545  }
16546  }
16547 
16548  cl_event gpuExec;
16549 
16550  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16551 
16552  clWaitForEvents(1, &gpuExec);
16553 
16554  int *result = (int *) malloc(typesz);
16555  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16556 
16557  std::vector<int> res = std::vector<int>();
16558  res.assign(result, result+sz);
16559 
16560  clReleaseCommandQueue (queue);
16561  clReleaseMemObject(buffer);
16562  clReleaseMemObject(buffer2);
16563  clReleaseMemObject(buffer3);
16564  clReleaseEvent(gpuExec);
16565  free(result);
16566 
16567  return res;
16568  }
16569 
16570  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
16571  size_t sz = v->size();
16572  size_t sz2 = v2.size();
16573  size_t sz3 = v3.size();
16574  size_t typesz = sizeof(int) * sz;
16575  size_t typesz2 = sizeof(double) * sz2;
16576  size_t typesz3 = sizeof(char) * sz3;
16577  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16578  if (temp_sz > 0){
16579  if (temp_sz > 2){
16580  if (temp_sz > 3){
16581  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16582  }
16583  typesz = sizeof(int) * params->buffers_size[0];
16584  typesz2 = sizeof(double) * params->buffers_size[1];
16585  typesz3 = sizeof(char) * params->buffers_size[2];
16586  }
16587  else{
16588  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16589  }
16590  }
16591  cl_int error = 0;
16592  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16593  checkError(error);
16594  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16595  checkError(error);
16596  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16597  checkError(error);
16598  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16599  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16600  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16601  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16602 
16603  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16604  checkError (error);
16605  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16606  checkError (error);
16607  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16608  checkError (error);
16609 
16610  size_t size[3] = {sz, sz2, sz3};
16611  size_t work_dimension = 3;
16612 
16613  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16614  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16615  work_dimension = 1;
16616  }
16617  else if(temp_sz > 0){
16618  if (params->multi_dimensional){
16619  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16620  For default multidimensional global work size, leave the global_work_size vector empty, \
16621  and set multi_dimensional to true. Setting the global work size based on the values inside \
16622  the global_work_size vector.");
16623  }
16624  if (temp_sz == 1){
16625  size[0] = params->global_work_size[0];
16626  work_dimension = 1;
16627  }
16628  else if (temp_sz == 2){
16629  size[0] = params->global_work_size[0];
16630  size[1] = params->global_work_size[1];
16631  work_dimension = 2;
16632  }
16633  else{
16634  size[0] = params->global_work_size[0];
16635  size[1] = params->global_work_size[1];
16636  size[2] = params->global_work_size[2];
16637  if (temp_sz > 3){
16638  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16639  }
16640  }
16641  }
16642 
16643  cl_event gpuExec;
16644 
16645  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16646 
16647  clWaitForEvents(1, &gpuExec);
16648 
16649  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16650 
16651  clWaitForEvents(1, &gpuExec);
16652 
16653  int *result = (int *) malloc(typesz);
16654  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16655 
16656  v->assign(result, result+sz);
16657 
16658  clReleaseCommandQueue (queue);
16659  clReleaseMemObject(buffer);
16660  clReleaseMemObject(buffer2);
16661  clReleaseMemObject(buffer3);
16662  clReleaseEvent(gpuExec);
16663  free(result);
16664  }
16665 
16666  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
16667  size_t sz = v->size();
16668  size_t sz2 = v2->size();
16669  size_t sz3 = v3.size();
16670  size_t typesz = sizeof(int) * sz;
16671  size_t typesz2 = sizeof(double) * sz2;
16672  size_t typesz3 = sizeof(char) * sz3;
16673  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16674  if (temp_sz > 0){
16675  if (temp_sz > 2){
16676  if (temp_sz > 3){
16677  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16678  }
16679  typesz = sizeof(int) * params->buffers_size[0];
16680  typesz2 = sizeof(double) * params->buffers_size[1];
16681  typesz3 = sizeof(char) * params->buffers_size[2];
16682  }
16683  else{
16684  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16685  }
16686  }
16687  cl_int error = 0;
16688  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16689  checkError(error);
16690  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16691  checkError(error);
16692  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16693  checkError(error);
16694  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16695  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16696  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16697  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16698 
16699  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16700  checkError (error);
16701  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16702  checkError (error);
16703  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16704  checkError (error);
16705 
16706  size_t size[3] = {sz, sz2, sz3};
16707  size_t work_dimension = 3;
16708 
16709  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16710  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16711  work_dimension = 1;
16712  }
16713  else if(temp_sz > 0){
16714  if (params->multi_dimensional){
16715  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16716  For default multidimensional global work size, leave the global_work_size vector empty, \
16717  and set multi_dimensional to true. Setting the global work size based on the values inside \
16718  the global_work_size vector.");
16719  }
16720  if (temp_sz == 1){
16721  size[0] = params->global_work_size[0];
16722  work_dimension = 1;
16723  }
16724  else if (temp_sz == 2){
16725  size[0] = params->global_work_size[0];
16726  size[1] = params->global_work_size[1];
16727  work_dimension = 2;
16728  }
16729  else{
16730  size[0] = params->global_work_size[0];
16731  size[1] = params->global_work_size[1];
16732  size[2] = params->global_work_size[2];
16733  if (temp_sz > 3){
16734  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16735  }
16736  }
16737  }
16738 
16739  cl_event gpuExec;
16740 
16741  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16742 
16743  clWaitForEvents(1, &gpuExec);
16744 
16745  int *result = (int *) malloc(typesz);
16746  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16747 
16748  v->assign(result, result+sz);
16749 
16750  if (typesz2 != typesz or sz != sz2){
16751  double *result2;
16752  result2 = (double *) malloc(typesz2);
16753  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16754 
16755  v2->assign(result2, result2+sz2);
16756  free(result2);
16757  }
16758  else{
16759  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16760 
16761  v2->assign(result, result+sz2);
16762  }
16763 
16764  clReleaseCommandQueue (queue);
16765  clReleaseMemObject(buffer);
16766  clReleaseMemObject(buffer2);
16767  clReleaseMemObject(buffer3);
16768  clReleaseEvent(gpuExec);
16769  free(result);
16770  }
16771 
16772  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
16773  size_t sz = v->size();
16774  size_t sz2 = v2->size();
16775  size_t sz3 = v3->size();
16776  size_t typesz = sizeof(int) * sz;
16777  size_t typesz2 = sizeof(double) * sz2;
16778  size_t typesz3 = sizeof(char) * sz3;
16779  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16780  if (temp_sz > 0){
16781  if (temp_sz > 2){
16782  if (temp_sz > 3){
16783  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16784  }
16785  typesz = sizeof(int) * params->buffers_size[0];
16786  typesz2 = sizeof(double) * params->buffers_size[1];
16787  typesz3 = sizeof(char) * params->buffers_size[2];
16788  }
16789  else{
16790  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16791  }
16792  }
16793  cl_int error = 0;
16794  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16795  checkError(error);
16796  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16797  checkError(error);
16798  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
16799  checkError(error);
16800  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16801  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16802  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16803  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16804 
16805  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16806  checkError (error);
16807  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16808  checkError (error);
16809  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
16810  checkError (error);
16811 
16812  size_t size[3] = {sz, sz2, sz3};
16813  size_t work_dimension = 3;
16814 
16815  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16816  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16817  work_dimension = 1;
16818  }
16819  else if(temp_sz > 0){
16820  if (params->multi_dimensional){
16821  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16822  For default multidimensional global work size, leave the global_work_size vector empty, \
16823  and set multi_dimensional to true. Setting the global work size based on the values inside \
16824  the global_work_size vector.");
16825  }
16826  if (temp_sz == 1){
16827  size[0] = params->global_work_size[0];
16828  work_dimension = 1;
16829  }
16830  else if (temp_sz == 2){
16831  size[0] = params->global_work_size[0];
16832  size[1] = params->global_work_size[1];
16833  work_dimension = 2;
16834  }
16835  else{
16836  size[0] = params->global_work_size[0];
16837  size[1] = params->global_work_size[1];
16838  size[2] = params->global_work_size[2];
16839  if (temp_sz > 3){
16840  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16841  }
16842  }
16843  }
16844 
16845  cl_event gpuExec;
16846 
16847  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16848 
16849  clWaitForEvents(1, &gpuExec);
16850 
16851  int *result = (int *) malloc(typesz);
16852  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16853 
16854  v->assign(result, result+sz);
16855 
16856  if (typesz2 != typesz or sz != sz2){
16857  double *result2;
16858  result2 = (double *) malloc(typesz2);
16859  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16860 
16861  v2->assign(result2, result2+sz2);
16862  free(result2);
16863  }
16864  else{
16865  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16866 
16867  v2->assign(result, result+sz2);
16868  }
16869 
16870  if (typesz3 != typesz or sz != sz3){
16871  char *result3;
16872  result3 = (char *) malloc(typesz3);
16873  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16874 
16875  v3->assign(result3, result3+sz3);
16876  free(result3);
16877  }
16878  else{
16879  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16880 
16881  v3->assign(result, result+sz3);
16882  }
16883 
16884  clReleaseCommandQueue (queue);
16885  clReleaseMemObject(buffer);
16886  clReleaseMemObject(buffer2);
16887  clReleaseMemObject(buffer3);
16888  clReleaseEvent(gpuExec);
16889  free(result);
16890  }
16891 
16892 
16893  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
16894  size_t sz = v.size();
16895  size_t sz2 = v2.size();
16896  size_t sz3 = v3.size();
16897  size_t typesz = sizeof(int) * sz;
16898  size_t typesz2 = sizeof(double) * sz2;
16899  size_t typesz3 = sizeof(int) * sz3;
16900  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16901  if (temp_sz > 0){
16902  if (temp_sz > 2){
16903  if (temp_sz > 3){
16904  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16905  }
16906  typesz = sizeof(int) * params->buffers_size[0];
16907  typesz2 = sizeof(double) * params->buffers_size[1];
16908  typesz3 = sizeof(int) * params->buffers_size[2];
16909  }
16910  else{
16911  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16912  }
16913  }
16914  cl_int error = 0;
16915  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16916  checkError(error);
16917  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16918  checkError(error);
16919  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16920  checkError(error);
16921  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
16922  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
16923  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
16924  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
16925 
16926  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16927  checkError (error);
16928  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16929  checkError (error);
16930  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16931  checkError (error);
16932 
16933  size_t size[3] = {sz, sz2, sz3};
16934  size_t work_dimension = 3;
16935 
16936  temp_sz = params != NULL ? params->global_work_size.size() : 0;
16937  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
16938  work_dimension = 1;
16939  }
16940  else if(temp_sz > 0){
16941  if (params->multi_dimensional){
16942  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
16943  For default multidimensional global work size, leave the global_work_size vector empty, \
16944  and set multi_dimensional to true. Setting the global work size based on the values inside \
16945  the global_work_size vector.");
16946  }
16947  if (temp_sz == 1){
16948  size[0] = params->global_work_size[0];
16949  work_dimension = 1;
16950  }
16951  else if (temp_sz == 2){
16952  size[0] = params->global_work_size[0];
16953  size[1] = params->global_work_size[1];
16954  work_dimension = 2;
16955  }
16956  else{
16957  size[0] = params->global_work_size[0];
16958  size[1] = params->global_work_size[1];
16959  size[2] = params->global_work_size[2];
16960  if (temp_sz > 3){
16961  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16962  }
16963  }
16964  }
16965 
16966  cl_event gpuExec;
16967 
16968  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16969 
16970  clWaitForEvents(1, &gpuExec);
16971 
16972  int *result = (int *) malloc(typesz);
16973  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16974 
16975  std::vector<int> res = std::vector<int>();
16976  res.assign(result, result+sz);
16977 
16978  clReleaseCommandQueue (queue);
16979  clReleaseMemObject(buffer);
16980  clReleaseMemObject(buffer2);
16981  clReleaseMemObject(buffer3);
16982  clReleaseEvent(gpuExec);
16983  free(result);
16984 
16985  return res;
16986  }
16987 
16988  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
16989  size_t sz = v->size();
16990  size_t sz2 = v2.size();
16991  size_t sz3 = v3.size();
16992  size_t typesz = sizeof(int) * sz;
16993  size_t typesz2 = sizeof(double) * sz2;
16994  size_t typesz3 = sizeof(int) * sz3;
16995  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
16996  if (temp_sz > 0){
16997  if (temp_sz > 2){
16998  if (temp_sz > 3){
16999  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17000  }
17001  typesz = sizeof(int) * params->buffers_size[0];
17002  typesz2 = sizeof(double) * params->buffers_size[1];
17003  typesz3 = sizeof(int) * params->buffers_size[2];
17004  }
17005  else{
17006  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17007  }
17008  }
17009  cl_int error = 0;
17010  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17011  checkError(error);
17012  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17013  checkError(error);
17014  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17015  checkError(error);
17016  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17017  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17018  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17019  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17020 
17021  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17022  checkError (error);
17023  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17024  checkError (error);
17025  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17026  checkError (error);
17027 
17028  size_t size[3] = {sz, sz2, sz3};
17029  size_t work_dimension = 3;
17030 
17031  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17032  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17033  work_dimension = 1;
17034  }
17035  else if(temp_sz > 0){
17036  if (params->multi_dimensional){
17037  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17038  For default multidimensional global work size, leave the global_work_size vector empty, \
17039  and set multi_dimensional to true. Setting the global work size based on the values inside \
17040  the global_work_size vector.");
17041  }
17042  if (temp_sz == 1){
17043  size[0] = params->global_work_size[0];
17044  work_dimension = 1;
17045  }
17046  else if (temp_sz == 2){
17047  size[0] = params->global_work_size[0];
17048  size[1] = params->global_work_size[1];
17049  work_dimension = 2;
17050  }
17051  else{
17052  size[0] = params->global_work_size[0];
17053  size[1] = params->global_work_size[1];
17054  size[2] = params->global_work_size[2];
17055  if (temp_sz > 3){
17056  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17057  }
17058  }
17059  }
17060 
17061  cl_event gpuExec;
17062 
17063  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17064 
17065  clWaitForEvents(1, &gpuExec);
17066 
17067  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17068 
17069  clWaitForEvents(1, &gpuExec);
17070 
17071  int *result = (int *) malloc(typesz);
17072  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17073 
17074  v->assign(result, result+sz);
17075 
17076  clReleaseCommandQueue (queue);
17077  clReleaseMemObject(buffer);
17078  clReleaseMemObject(buffer2);
17079  clReleaseMemObject(buffer3);
17080  clReleaseEvent(gpuExec);
17081  free(result);
17082  }
17083 
17084  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
17085  size_t sz = v->size();
17086  size_t sz2 = v2->size();
17087  size_t sz3 = v3.size();
17088  size_t typesz = sizeof(int) * sz;
17089  size_t typesz2 = sizeof(double) * sz2;
17090  size_t typesz3 = sizeof(int) * sz3;
17091  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17092  if (temp_sz > 0){
17093  if (temp_sz > 2){
17094  if (temp_sz > 3){
17095  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17096  }
17097  typesz = sizeof(int) * params->buffers_size[0];
17098  typesz2 = sizeof(double) * params->buffers_size[1];
17099  typesz3 = sizeof(int) * params->buffers_size[2];
17100  }
17101  else{
17102  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17103  }
17104  }
17105  cl_int error = 0;
17106  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17107  checkError(error);
17108  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17109  checkError(error);
17110  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17111  checkError(error);
17112  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17113  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17114  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17115  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17116 
17117  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17118  checkError (error);
17119  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17120  checkError (error);
17121  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17122  checkError (error);
17123 
17124  size_t size[3] = {sz, sz2, sz3};
17125  size_t work_dimension = 3;
17126 
17127  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17128  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17129  work_dimension = 1;
17130  }
17131  else if(temp_sz > 0){
17132  if (params->multi_dimensional){
17133  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17134  For default multidimensional global work size, leave the global_work_size vector empty, \
17135  and set multi_dimensional to true. Setting the global work size based on the values inside \
17136  the global_work_size vector.");
17137  }
17138  if (temp_sz == 1){
17139  size[0] = params->global_work_size[0];
17140  work_dimension = 1;
17141  }
17142  else if (temp_sz == 2){
17143  size[0] = params->global_work_size[0];
17144  size[1] = params->global_work_size[1];
17145  work_dimension = 2;
17146  }
17147  else{
17148  size[0] = params->global_work_size[0];
17149  size[1] = params->global_work_size[1];
17150  size[2] = params->global_work_size[2];
17151  if (temp_sz > 3){
17152  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17153  }
17154  }
17155  }
17156 
17157  cl_event gpuExec;
17158 
17159  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17160 
17161  clWaitForEvents(1, &gpuExec);
17162 
17163  int *result = (int *) malloc(typesz);
17164  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17165 
17166  v->assign(result, result+sz);
17167 
17168  if (typesz2 != typesz or sz != sz2){
17169  double *result2;
17170  result2 = (double *) malloc(typesz2);
17171  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17172 
17173  v2->assign(result2, result2+sz2);
17174  free(result2);
17175  }
17176  else{
17177  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17178 
17179  v2->assign(result, result+sz2);
17180  }
17181 
17182  clReleaseCommandQueue (queue);
17183  clReleaseMemObject(buffer);
17184  clReleaseMemObject(buffer2);
17185  clReleaseMemObject(buffer3);
17186  clReleaseEvent(gpuExec);
17187  free(result);
17188  }
17189 
17190  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
17191  size_t sz = v->size();
17192  size_t sz2 = v2->size();
17193  size_t sz3 = v3->size();
17194  size_t typesz = sizeof(int) * sz;
17195  size_t typesz2 = sizeof(double) * sz2;
17196  size_t typesz3 = sizeof(int) * sz3;
17197  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17198  if (temp_sz > 0){
17199  if (temp_sz > 2){
17200  if (temp_sz > 3){
17201  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17202  }
17203  typesz = sizeof(int) * params->buffers_size[0];
17204  typesz2 = sizeof(double) * params->buffers_size[1];
17205  typesz3 = sizeof(int) * params->buffers_size[2];
17206  }
17207  else{
17208  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17209  }
17210  }
17211  cl_int error = 0;
17212  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17213  checkError(error);
17214  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17215  checkError(error);
17216  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
17217  checkError(error);
17218  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17219  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17220  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17221  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17222 
17223  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17224  checkError (error);
17225  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17226  checkError (error);
17227  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
17228  checkError (error);
17229 
17230  size_t size[3] = {sz, sz2, sz3};
17231  size_t work_dimension = 3;
17232 
17233  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17234  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17235  work_dimension = 1;
17236  }
17237  else if(temp_sz > 0){
17238  if (params->multi_dimensional){
17239  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17240  For default multidimensional global work size, leave the global_work_size vector empty, \
17241  and set multi_dimensional to true. Setting the global work size based on the values inside \
17242  the global_work_size vector.");
17243  }
17244  if (temp_sz == 1){
17245  size[0] = params->global_work_size[0];
17246  work_dimension = 1;
17247  }
17248  else if (temp_sz == 2){
17249  size[0] = params->global_work_size[0];
17250  size[1] = params->global_work_size[1];
17251  work_dimension = 2;
17252  }
17253  else{
17254  size[0] = params->global_work_size[0];
17255  size[1] = params->global_work_size[1];
17256  size[2] = params->global_work_size[2];
17257  if (temp_sz > 3){
17258  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17259  }
17260  }
17261  }
17262 
17263  cl_event gpuExec;
17264 
17265  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17266 
17267  clWaitForEvents(1, &gpuExec);
17268 
17269  int *result = (int *) malloc(typesz);
17270  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17271 
17272  v->assign(result, result+sz);
17273 
17274  if (typesz2 != typesz or sz != sz2){
17275  double *result2;
17276  result2 = (double *) malloc(typesz2);
17277  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17278 
17279  v2->assign(result2, result2+sz2);
17280  free(result2);
17281  }
17282  else{
17283  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17284 
17285  v2->assign(result, result+sz2);
17286  }
17287 
17288  if (typesz3 != typesz or sz != sz3){
17289  int *result3;
17290  result3 = (int *) malloc(typesz3);
17291  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
17292 
17293  v3->assign(result3, result3+sz3);
17294  free(result3);
17295  }
17296  else{
17297  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17298 
17299  v3->assign(result, result+sz3);
17300  }
17301 
17302  clReleaseCommandQueue (queue);
17303  clReleaseMemObject(buffer);
17304  clReleaseMemObject(buffer2);
17305  clReleaseMemObject(buffer3);
17306  clReleaseEvent(gpuExec);
17307  free(result);
17308  }
17309 
17310 
17311  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
17312  size_t sz = v.size();
17313  size_t sz2 = v2.size();
17314  size_t sz3 = v3.size();
17315  size_t typesz = sizeof(int) * sz;
17316  size_t typesz2 = sizeof(double) * sz2;
17317  size_t typesz3 = sizeof(float) * sz3;
17318  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17319  if (temp_sz > 0){
17320  if (temp_sz > 2){
17321  if (temp_sz > 3){
17322  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17323  }
17324  typesz = sizeof(int) * params->buffers_size[0];
17325  typesz2 = sizeof(double) * params->buffers_size[1];
17326  typesz3 = sizeof(float) * params->buffers_size[2];
17327  }
17328  else{
17329  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17330  }
17331  }
17332  cl_int error = 0;
17333  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17334  checkError(error);
17335  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17336  checkError(error);
17337  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17338  checkError(error);
17339  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17340  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17341  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17342  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17343 
17344  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
17345  checkError (error);
17346  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17347  checkError (error);
17348  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17349  checkError (error);
17350 
17351  size_t size[3] = {sz, sz2, sz3};
17352  size_t work_dimension = 3;
17353 
17354  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17355  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17356  work_dimension = 1;
17357  }
17358  else if(temp_sz > 0){
17359  if (params->multi_dimensional){
17360  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17361  For default multidimensional global work size, leave the global_work_size vector empty, \
17362  and set multi_dimensional to true. Setting the global work size based on the values inside \
17363  the global_work_size vector.");
17364  }
17365  if (temp_sz == 1){
17366  size[0] = params->global_work_size[0];
17367  work_dimension = 1;
17368  }
17369  else if (temp_sz == 2){
17370  size[0] = params->global_work_size[0];
17371  size[1] = params->global_work_size[1];
17372  work_dimension = 2;
17373  }
17374  else{
17375  size[0] = params->global_work_size[0];
17376  size[1] = params->global_work_size[1];
17377  size[2] = params->global_work_size[2];
17378  if (temp_sz > 3){
17379  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17380  }
17381  }
17382  }
17383 
17384  cl_event gpuExec;
17385 
17386  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17387 
17388  clWaitForEvents(1, &gpuExec);
17389 
17390  int *result = (int *) malloc(typesz);
17391  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17392 
17393  std::vector<int> res = std::vector<int>();
17394  res.assign(result, result+sz);
17395 
17396  clReleaseCommandQueue (queue);
17397  clReleaseMemObject(buffer);
17398  clReleaseMemObject(buffer2);
17399  clReleaseMemObject(buffer3);
17400  clReleaseEvent(gpuExec);
17401  free(result);
17402 
17403  return res;
17404  }
17405 
17406  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
17407  size_t sz = v->size();
17408  size_t sz2 = v2.size();
17409  size_t sz3 = v3.size();
17410  size_t typesz = sizeof(int) * sz;
17411  size_t typesz2 = sizeof(double) * sz2;
17412  size_t typesz3 = sizeof(float) * sz3;
17413  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17414  if (temp_sz > 0){
17415  if (temp_sz > 2){
17416  if (temp_sz > 3){
17417  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17418  }
17419  typesz = sizeof(int) * params->buffers_size[0];
17420  typesz2 = sizeof(double) * params->buffers_size[1];
17421  typesz3 = sizeof(float) * params->buffers_size[2];
17422  }
17423  else{
17424  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17425  }
17426  }
17427  cl_int error = 0;
17428  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17429  checkError(error);
17430  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17431  checkError(error);
17432  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17433  checkError(error);
17434  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17435  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17436  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17437  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17438 
17439  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17440  checkError (error);
17441  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17442  checkError (error);
17443  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17444  checkError (error);
17445 
17446  size_t size[3] = {sz, sz2, sz3};
17447  size_t work_dimension = 3;
17448 
17449  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17450  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17451  work_dimension = 1;
17452  }
17453  else if(temp_sz > 0){
17454  if (params->multi_dimensional){
17455  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17456  For default multidimensional global work size, leave the global_work_size vector empty, \
17457  and set multi_dimensional to true. Setting the global work size based on the values inside \
17458  the global_work_size vector.");
17459  }
17460  if (temp_sz == 1){
17461  size[0] = params->global_work_size[0];
17462  work_dimension = 1;
17463  }
17464  else if (temp_sz == 2){
17465  size[0] = params->global_work_size[0];
17466  size[1] = params->global_work_size[1];
17467  work_dimension = 2;
17468  }
17469  else{
17470  size[0] = params->global_work_size[0];
17471  size[1] = params->global_work_size[1];
17472  size[2] = params->global_work_size[2];
17473  if (temp_sz > 3){
17474  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17475  }
17476  }
17477  }
17478 
17479  cl_event gpuExec;
17480 
17481  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17482 
17483  clWaitForEvents(1, &gpuExec);
17484 
17485  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17486 
17487  clWaitForEvents(1, &gpuExec);
17488 
17489  int *result = (int *) malloc(typesz);
17490  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17491 
17492  v->assign(result, result+sz);
17493 
17494  clReleaseCommandQueue (queue);
17495  clReleaseMemObject(buffer);
17496  clReleaseMemObject(buffer2);
17497  clReleaseMemObject(buffer3);
17498  clReleaseEvent(gpuExec);
17499  free(result);
17500  }
17501 
17502  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
17503  size_t sz = v->size();
17504  size_t sz2 = v2->size();
17505  size_t sz3 = v3.size();
17506  size_t typesz = sizeof(int) * sz;
17507  size_t typesz2 = sizeof(double) * sz2;
17508  size_t typesz3 = sizeof(float) * sz3;
17509  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17510  if (temp_sz > 0){
17511  if (temp_sz > 2){
17512  if (temp_sz > 3){
17513  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17514  }
17515  typesz = sizeof(int) * params->buffers_size[0];
17516  typesz2 = sizeof(double) * params->buffers_size[1];
17517  typesz3 = sizeof(float) * params->buffers_size[2];
17518  }
17519  else{
17520  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17521  }
17522  }
17523  cl_int error = 0;
17524  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17525  checkError(error);
17526  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17527  checkError(error);
17528  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17529  checkError(error);
17530  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17531  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17532  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17533  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17534 
17535  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17536  checkError (error);
17537  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17538  checkError (error);
17539  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17540  checkError (error);
17541 
17542  size_t size[3] = {sz, sz2, sz3};
17543  size_t work_dimension = 3;
17544 
17545  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17546  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17547  work_dimension = 1;
17548  }
17549  else if(temp_sz > 0){
17550  if (params->multi_dimensional){
17551  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17552  For default multidimensional global work size, leave the global_work_size vector empty, \
17553  and set multi_dimensional to true. Setting the global work size based on the values inside \
17554  the global_work_size vector.");
17555  }
17556  if (temp_sz == 1){
17557  size[0] = params->global_work_size[0];
17558  work_dimension = 1;
17559  }
17560  else if (temp_sz == 2){
17561  size[0] = params->global_work_size[0];
17562  size[1] = params->global_work_size[1];
17563  work_dimension = 2;
17564  }
17565  else{
17566  size[0] = params->global_work_size[0];
17567  size[1] = params->global_work_size[1];
17568  size[2] = params->global_work_size[2];
17569  if (temp_sz > 3){
17570  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17571  }
17572  }
17573  }
17574 
17575  cl_event gpuExec;
17576 
17577  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17578 
17579  clWaitForEvents(1, &gpuExec);
17580 
17581  int *result = (int *) malloc(typesz);
17582  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17583 
17584  v->assign(result, result+sz);
17585 
17586  if (typesz2 != typesz or sz != sz2){
17587  double *result2;
17588  result2 = (double *) malloc(typesz2);
17589  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17590 
17591  v2->assign(result2, result2+sz2);
17592  free(result2);
17593  }
17594  else{
17595  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17596 
17597  v2->assign(result, result+sz2);
17598  }
17599 
17600  clReleaseCommandQueue (queue);
17601  clReleaseMemObject(buffer);
17602  clReleaseMemObject(buffer2);
17603  clReleaseMemObject(buffer3);
17604  clReleaseEvent(gpuExec);
17605  free(result);
17606  }
17607 
17608  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
17609  size_t sz = v->size();
17610  size_t sz2 = v2->size();
17611  size_t sz3 = v3->size();
17612  size_t typesz = sizeof(int) * sz;
17613  size_t typesz2 = sizeof(double) * sz2;
17614  size_t typesz3 = sizeof(float) * sz3;
17615  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17616  if (temp_sz > 0){
17617  if (temp_sz > 2){
17618  if (temp_sz > 3){
17619  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17620  }
17621  typesz = sizeof(int) * params->buffers_size[0];
17622  typesz2 = sizeof(double) * params->buffers_size[1];
17623  typesz3 = sizeof(float) * params->buffers_size[2];
17624  }
17625  else{
17626  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17627  }
17628  }
17629  cl_int error = 0;
17630  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17631  checkError(error);
17632  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17633  checkError(error);
17634  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
17635  checkError(error);
17636  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17637  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17638  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17639  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17640 
17641  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17642  checkError (error);
17643  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17644  checkError (error);
17645  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
17646  checkError (error);
17647 
17648  size_t size[3] = {sz, sz2, sz3};
17649  size_t work_dimension = 3;
17650 
17651  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17652  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17653  work_dimension = 1;
17654  }
17655  else if(temp_sz > 0){
17656  if (params->multi_dimensional){
17657  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17658  For default multidimensional global work size, leave the global_work_size vector empty, \
17659  and set multi_dimensional to true. Setting the global work size based on the values inside \
17660  the global_work_size vector.");
17661  }
17662  if (temp_sz == 1){
17663  size[0] = params->global_work_size[0];
17664  work_dimension = 1;
17665  }
17666  else if (temp_sz == 2){
17667  size[0] = params->global_work_size[0];
17668  size[1] = params->global_work_size[1];
17669  work_dimension = 2;
17670  }
17671  else{
17672  size[0] = params->global_work_size[0];
17673  size[1] = params->global_work_size[1];
17674  size[2] = params->global_work_size[2];
17675  if (temp_sz > 3){
17676  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17677  }
17678  }
17679  }
17680 
17681  cl_event gpuExec;
17682 
17683  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17684 
17685  clWaitForEvents(1, &gpuExec);
17686 
17687  int *result = (int *) malloc(typesz);
17688  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17689 
17690  v->assign(result, result+sz);
17691 
17692  if (typesz2 != typesz or sz != sz2){
17693  double *result2;
17694  result2 = (double *) malloc(typesz2);
17695  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17696 
17697  v2->assign(result2, result2+sz2);
17698  free(result2);
17699  }
17700  else{
17701  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17702 
17703  v2->assign(result, result+sz2);
17704  }
17705 
17706  if (typesz3 != typesz or sz != sz3){
17707  float *result3;
17708  result3 = (float *) malloc(typesz3);
17709  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
17710 
17711  v3->assign(result3, result3+sz3);
17712  free(result3);
17713  }
17714  else{
17715  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17716 
17717  v3->assign(result, result+sz3);
17718  }
17719 
17720  clReleaseCommandQueue (queue);
17721  clReleaseMemObject(buffer);
17722  clReleaseMemObject(buffer2);
17723  clReleaseMemObject(buffer3);
17724  clReleaseEvent(gpuExec);
17725  free(result);
17726  }
17727 
17728 
17729  std::vector<int> ROS_OpenCL::process(const std::vector<int> v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
17730  size_t sz = v.size();
17731  size_t sz2 = v2.size();
17732  size_t sz3 = v3.size();
17733  size_t typesz = sizeof(int) * sz;
17734  size_t typesz2 = sizeof(double) * sz2;
17735  size_t typesz3 = sizeof(double) * sz3;
17736  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17737  if (temp_sz > 0){
17738  if (temp_sz > 2){
17739  if (temp_sz > 3){
17740  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17741  }
17742  typesz = sizeof(int) * params->buffers_size[0];
17743  typesz2 = sizeof(double) * params->buffers_size[1];
17744  typesz3 = sizeof(double) * params->buffers_size[2];
17745  }
17746  else{
17747  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17748  }
17749  }
17750  cl_int error = 0;
17751  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17752  checkError(error);
17753  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17754  checkError(error);
17755  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17756  checkError(error);
17757  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17758  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17759  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17760  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17761 
17762  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
17763  checkError (error);
17764  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17765  checkError (error);
17766  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17767  checkError (error);
17768 
17769  size_t size[3] = {sz, sz2, sz3};
17770  size_t work_dimension = 3;
17771 
17772  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17773  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17774  work_dimension = 1;
17775  }
17776  else if(temp_sz > 0){
17777  if (params->multi_dimensional){
17778  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17779  For default multidimensional global work size, leave the global_work_size vector empty, \
17780  and set multi_dimensional to true. Setting the global work size based on the values inside \
17781  the global_work_size vector.");
17782  }
17783  if (temp_sz == 1){
17784  size[0] = params->global_work_size[0];
17785  work_dimension = 1;
17786  }
17787  else if (temp_sz == 2){
17788  size[0] = params->global_work_size[0];
17789  size[1] = params->global_work_size[1];
17790  work_dimension = 2;
17791  }
17792  else{
17793  size[0] = params->global_work_size[0];
17794  size[1] = params->global_work_size[1];
17795  size[2] = params->global_work_size[2];
17796  if (temp_sz > 3){
17797  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17798  }
17799  }
17800  }
17801 
17802  cl_event gpuExec;
17803 
17804  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17805 
17806  clWaitForEvents(1, &gpuExec);
17807 
17808  int *result = (int *) malloc(typesz);
17809  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17810 
17811  std::vector<int> res = std::vector<int>();
17812  res.assign(result, result+sz);
17813 
17814  clReleaseCommandQueue (queue);
17815  clReleaseMemObject(buffer);
17816  clReleaseMemObject(buffer2);
17817  clReleaseMemObject(buffer3);
17818  clReleaseEvent(gpuExec);
17819  free(result);
17820 
17821  return res;
17822  }
17823 
17824  void ROS_OpenCL::process(std::vector<int>* v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
17825  size_t sz = v->size();
17826  size_t sz2 = v2.size();
17827  size_t sz3 = v3.size();
17828  size_t typesz = sizeof(int) * sz;
17829  size_t typesz2 = sizeof(double) * sz2;
17830  size_t typesz3 = sizeof(double) * sz3;
17831  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17832  if (temp_sz > 0){
17833  if (temp_sz > 2){
17834  if (temp_sz > 3){
17835  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17836  }
17837  typesz = sizeof(int) * params->buffers_size[0];
17838  typesz2 = sizeof(double) * params->buffers_size[1];
17839  typesz3 = sizeof(double) * params->buffers_size[2];
17840  }
17841  else{
17842  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17843  }
17844  }
17845  cl_int error = 0;
17846  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17847  checkError(error);
17848  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17849  checkError(error);
17850  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17851  checkError(error);
17852  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17853  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17854  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17855  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17856 
17857  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17858  checkError (error);
17859  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17860  checkError (error);
17861  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17862  checkError (error);
17863 
17864  size_t size[3] = {sz, sz2, sz3};
17865  size_t work_dimension = 3;
17866 
17867  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17868  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17869  work_dimension = 1;
17870  }
17871  else if(temp_sz > 0){
17872  if (params->multi_dimensional){
17873  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17874  For default multidimensional global work size, leave the global_work_size vector empty, \
17875  and set multi_dimensional to true. Setting the global work size based on the values inside \
17876  the global_work_size vector.");
17877  }
17878  if (temp_sz == 1){
17879  size[0] = params->global_work_size[0];
17880  work_dimension = 1;
17881  }
17882  else if (temp_sz == 2){
17883  size[0] = params->global_work_size[0];
17884  size[1] = params->global_work_size[1];
17885  work_dimension = 2;
17886  }
17887  else{
17888  size[0] = params->global_work_size[0];
17889  size[1] = params->global_work_size[1];
17890  size[2] = params->global_work_size[2];
17891  if (temp_sz > 3){
17892  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17893  }
17894  }
17895  }
17896 
17897  cl_event gpuExec;
17898 
17899  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17900 
17901  clWaitForEvents(1, &gpuExec);
17902 
17903  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17904 
17905  clWaitForEvents(1, &gpuExec);
17906 
17907  int *result = (int *) malloc(typesz);
17908  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17909 
17910  v->assign(result, result+sz);
17911 
17912  clReleaseCommandQueue (queue);
17913  clReleaseMemObject(buffer);
17914  clReleaseMemObject(buffer2);
17915  clReleaseMemObject(buffer3);
17916  clReleaseEvent(gpuExec);
17917  free(result);
17918  }
17919 
17920  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
17921  size_t sz = v->size();
17922  size_t sz2 = v2->size();
17923  size_t sz3 = v3.size();
17924  size_t typesz = sizeof(int) * sz;
17925  size_t typesz2 = sizeof(double) * sz2;
17926  size_t typesz3 = sizeof(double) * sz3;
17927  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
17928  if (temp_sz > 0){
17929  if (temp_sz > 2){
17930  if (temp_sz > 3){
17931  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17932  }
17933  typesz = sizeof(int) * params->buffers_size[0];
17934  typesz2 = sizeof(double) * params->buffers_size[1];
17935  typesz3 = sizeof(double) * params->buffers_size[2];
17936  }
17937  else{
17938  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17939  }
17940  }
17941  cl_int error = 0;
17942  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17943  checkError(error);
17944  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17945  checkError(error);
17946  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17947  checkError(error);
17948  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
17949  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
17950  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
17951  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
17952 
17953  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17954  checkError (error);
17955  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17956  checkError (error);
17957  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17958  checkError (error);
17959 
17960  size_t size[3] = {sz, sz2, sz3};
17961  size_t work_dimension = 3;
17962 
17963  temp_sz = params != NULL ? params->global_work_size.size() : 0;
17964  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
17965  work_dimension = 1;
17966  }
17967  else if(temp_sz > 0){
17968  if (params->multi_dimensional){
17969  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
17970  For default multidimensional global work size, leave the global_work_size vector empty, \
17971  and set multi_dimensional to true. Setting the global work size based on the values inside \
17972  the global_work_size vector.");
17973  }
17974  if (temp_sz == 1){
17975  size[0] = params->global_work_size[0];
17976  work_dimension = 1;
17977  }
17978  else if (temp_sz == 2){
17979  size[0] = params->global_work_size[0];
17980  size[1] = params->global_work_size[1];
17981  work_dimension = 2;
17982  }
17983  else{
17984  size[0] = params->global_work_size[0];
17985  size[1] = params->global_work_size[1];
17986  size[2] = params->global_work_size[2];
17987  if (temp_sz > 3){
17988  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17989  }
17990  }
17991  }
17992 
17993  cl_event gpuExec;
17994 
17995  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17996 
17997  clWaitForEvents(1, &gpuExec);
17998 
17999  int *result = (int *) malloc(typesz);
18000  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18001 
18002  v->assign(result, result+sz);
18003 
18004  if (typesz2 != typesz or sz != sz2){
18005  double *result2;
18006  result2 = (double *) malloc(typesz2);
18007  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18008 
18009  v2->assign(result2, result2+sz2);
18010  free(result2);
18011  }
18012  else{
18013  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18014 
18015  v2->assign(result, result+sz2);
18016  }
18017 
18018  clReleaseCommandQueue (queue);
18019  clReleaseMemObject(buffer);
18020  clReleaseMemObject(buffer2);
18021  clReleaseMemObject(buffer3);
18022  clReleaseEvent(gpuExec);
18023  free(result);
18024  }
18025 
18026  void ROS_OpenCL::process(std::vector<int>* v, std::vector<double>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
18027  size_t sz = v->size();
18028  size_t sz2 = v2->size();
18029  size_t sz3 = v3->size();
18030  size_t typesz = sizeof(int) * sz;
18031  size_t typesz2 = sizeof(double) * sz2;
18032  size_t typesz3 = sizeof(double) * sz3;
18033  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18034  if (temp_sz > 0){
18035  if (temp_sz > 2){
18036  if (temp_sz > 3){
18037  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18038  }
18039  typesz = sizeof(int) * params->buffers_size[0];
18040  typesz2 = sizeof(double) * params->buffers_size[1];
18041  typesz3 = sizeof(double) * params->buffers_size[2];
18042  }
18043  else{
18044  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18045  }
18046  }
18047  cl_int error = 0;
18048  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18049  checkError(error);
18050  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18051  checkError(error);
18052  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18053  checkError(error);
18054  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18055  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18056  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18057  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18058 
18059  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18060  checkError (error);
18061  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18062  checkError (error);
18063  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18064  checkError (error);
18065 
18066  size_t size[3] = {sz, sz2, sz3};
18067  size_t work_dimension = 3;
18068 
18069  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18070  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18071  work_dimension = 1;
18072  }
18073  else if(temp_sz > 0){
18074  if (params->multi_dimensional){
18075  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18076  For default multidimensional global work size, leave the global_work_size vector empty, \
18077  and set multi_dimensional to true. Setting the global work size based on the values inside \
18078  the global_work_size vector.");
18079  }
18080  if (temp_sz == 1){
18081  size[0] = params->global_work_size[0];
18082  work_dimension = 1;
18083  }
18084  else if (temp_sz == 2){
18085  size[0] = params->global_work_size[0];
18086  size[1] = params->global_work_size[1];
18087  work_dimension = 2;
18088  }
18089  else{
18090  size[0] = params->global_work_size[0];
18091  size[1] = params->global_work_size[1];
18092  size[2] = params->global_work_size[2];
18093  if (temp_sz > 3){
18094  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18095  }
18096  }
18097  }
18098 
18099  cl_event gpuExec;
18100 
18101  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18102 
18103  clWaitForEvents(1, &gpuExec);
18104 
18105  int *result = (int *) malloc(typesz);
18106  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18107 
18108  v->assign(result, result+sz);
18109 
18110  if (typesz2 != typesz or sz != sz2){
18111  double *result2;
18112  result2 = (double *) malloc(typesz2);
18113  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18114 
18115  v2->assign(result2, result2+sz2);
18116  free(result2);
18117  }
18118  else{
18119  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18120 
18121  v2->assign(result, result+sz2);
18122  }
18123 
18124  if (typesz3 != typesz or sz != sz3){
18125  double *result3;
18126  result3 = (double *) malloc(typesz3);
18127  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18128 
18129  v3->assign(result3, result3+sz3);
18130  free(result3);
18131  }
18132  else{
18133  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18134 
18135  v3->assign(result, result+sz3);
18136  }
18137 
18138  clReleaseCommandQueue (queue);
18139  clReleaseMemObject(buffer);
18140  clReleaseMemObject(buffer2);
18141  clReleaseMemObject(buffer3);
18142  clReleaseEvent(gpuExec);
18143  free(result);
18144  }
18145 
18146 
18147  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
18148  size_t sz = v.size();
18149  size_t sz2 = v2.size();
18150  size_t sz3 = v3.size();
18151  size_t typesz = sizeof(float) * sz;
18152  size_t typesz2 = sizeof(char) * sz2;
18153  size_t typesz3 = sizeof(char) * sz3;
18154  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18155  if (temp_sz > 0){
18156  if (temp_sz > 2){
18157  if (temp_sz > 3){
18158  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18159  }
18160  typesz = sizeof(float) * params->buffers_size[0];
18161  typesz2 = sizeof(char) * params->buffers_size[1];
18162  typesz3 = sizeof(char) * params->buffers_size[2];
18163  }
18164  else{
18165  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18166  }
18167  }
18168  cl_int error = 0;
18169  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18170  checkError(error);
18171  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18172  checkError(error);
18173  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18174  checkError(error);
18175  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18176  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18177  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18178  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18179 
18180  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
18181  checkError (error);
18182  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18183  checkError (error);
18184  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18185  checkError (error);
18186 
18187  size_t size[3] = {sz, sz2, sz3};
18188  size_t work_dimension = 3;
18189 
18190  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18191  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18192  work_dimension = 1;
18193  }
18194  else if(temp_sz > 0){
18195  if (params->multi_dimensional){
18196  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18197  For default multidimensional global work size, leave the global_work_size vector empty, \
18198  and set multi_dimensional to true. Setting the global work size based on the values inside \
18199  the global_work_size vector.");
18200  }
18201  if (temp_sz == 1){
18202  size[0] = params->global_work_size[0];
18203  work_dimension = 1;
18204  }
18205  else if (temp_sz == 2){
18206  size[0] = params->global_work_size[0];
18207  size[1] = params->global_work_size[1];
18208  work_dimension = 2;
18209  }
18210  else{
18211  size[0] = params->global_work_size[0];
18212  size[1] = params->global_work_size[1];
18213  size[2] = params->global_work_size[2];
18214  if (temp_sz > 3){
18215  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18216  }
18217  }
18218  }
18219 
18220  cl_event gpuExec;
18221 
18222  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18223 
18224  clWaitForEvents(1, &gpuExec);
18225 
18226  float *result = (float *) malloc(typesz);
18227  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18228 
18229  std::vector<float> res = std::vector<float>();
18230  res.assign(result, result+sz);
18231 
18232  clReleaseCommandQueue (queue);
18233  clReleaseMemObject(buffer);
18234  clReleaseMemObject(buffer2);
18235  clReleaseMemObject(buffer3);
18236  clReleaseEvent(gpuExec);
18237  free(result);
18238 
18239  return res;
18240  }
18241 
18242  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
18243  size_t sz = v->size();
18244  size_t sz2 = v2.size();
18245  size_t sz3 = v3.size();
18246  size_t typesz = sizeof(float) * sz;
18247  size_t typesz2 = sizeof(char) * sz2;
18248  size_t typesz3 = sizeof(char) * sz3;
18249  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18250  if (temp_sz > 0){
18251  if (temp_sz > 2){
18252  if (temp_sz > 3){
18253  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18254  }
18255  typesz = sizeof(float) * params->buffers_size[0];
18256  typesz2 = sizeof(char) * params->buffers_size[1];
18257  typesz3 = sizeof(char) * params->buffers_size[2];
18258  }
18259  else{
18260  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18261  }
18262  }
18263  cl_int error = 0;
18264  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18265  checkError(error);
18266  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18267  checkError(error);
18268  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18269  checkError(error);
18270  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18271  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18272  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18273  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18274 
18275  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18276  checkError (error);
18277  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18278  checkError (error);
18279  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18280  checkError (error);
18281 
18282  size_t size[3] = {sz, sz2, sz3};
18283  size_t work_dimension = 3;
18284 
18285  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18286  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18287  work_dimension = 1;
18288  }
18289  else if(temp_sz > 0){
18290  if (params->multi_dimensional){
18291  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18292  For default multidimensional global work size, leave the global_work_size vector empty, \
18293  and set multi_dimensional to true. Setting the global work size based on the values inside \
18294  the global_work_size vector.");
18295  }
18296  if (temp_sz == 1){
18297  size[0] = params->global_work_size[0];
18298  work_dimension = 1;
18299  }
18300  else if (temp_sz == 2){
18301  size[0] = params->global_work_size[0];
18302  size[1] = params->global_work_size[1];
18303  work_dimension = 2;
18304  }
18305  else{
18306  size[0] = params->global_work_size[0];
18307  size[1] = params->global_work_size[1];
18308  size[2] = params->global_work_size[2];
18309  if (temp_sz > 3){
18310  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18311  }
18312  }
18313  }
18314 
18315  cl_event gpuExec;
18316 
18317  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18318 
18319  clWaitForEvents(1, &gpuExec);
18320 
18321  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18322 
18323  clWaitForEvents(1, &gpuExec);
18324 
18325  float *result = (float *) malloc(typesz);
18326  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18327 
18328  v->assign(result, result+sz);
18329 
18330  clReleaseCommandQueue (queue);
18331  clReleaseMemObject(buffer);
18332  clReleaseMemObject(buffer2);
18333  clReleaseMemObject(buffer3);
18334  clReleaseEvent(gpuExec);
18335  free(result);
18336  }
18337 
18338  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
18339  size_t sz = v->size();
18340  size_t sz2 = v2->size();
18341  size_t sz3 = v3.size();
18342  size_t typesz = sizeof(float) * sz;
18343  size_t typesz2 = sizeof(char) * sz2;
18344  size_t typesz3 = sizeof(char) * sz3;
18345  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18346  if (temp_sz > 0){
18347  if (temp_sz > 2){
18348  if (temp_sz > 3){
18349  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18350  }
18351  typesz = sizeof(float) * params->buffers_size[0];
18352  typesz2 = sizeof(char) * params->buffers_size[1];
18353  typesz3 = sizeof(char) * params->buffers_size[2];
18354  }
18355  else{
18356  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18357  }
18358  }
18359  cl_int error = 0;
18360  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18361  checkError(error);
18362  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18363  checkError(error);
18364  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18365  checkError(error);
18366  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18367  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18368  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18369  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18370 
18371  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18372  checkError (error);
18373  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18374  checkError (error);
18375  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18376  checkError (error);
18377 
18378  size_t size[3] = {sz, sz2, sz3};
18379  size_t work_dimension = 3;
18380 
18381  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18382  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18383  work_dimension = 1;
18384  }
18385  else if(temp_sz > 0){
18386  if (params->multi_dimensional){
18387  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18388  For default multidimensional global work size, leave the global_work_size vector empty, \
18389  and set multi_dimensional to true. Setting the global work size based on the values inside \
18390  the global_work_size vector.");
18391  }
18392  if (temp_sz == 1){
18393  size[0] = params->global_work_size[0];
18394  work_dimension = 1;
18395  }
18396  else if (temp_sz == 2){
18397  size[0] = params->global_work_size[0];
18398  size[1] = params->global_work_size[1];
18399  work_dimension = 2;
18400  }
18401  else{
18402  size[0] = params->global_work_size[0];
18403  size[1] = params->global_work_size[1];
18404  size[2] = params->global_work_size[2];
18405  if (temp_sz > 3){
18406  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18407  }
18408  }
18409  }
18410 
18411  cl_event gpuExec;
18412 
18413  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18414 
18415  clWaitForEvents(1, &gpuExec);
18416 
18417  float *result = (float *) malloc(typesz);
18418  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18419 
18420  v->assign(result, result+sz);
18421 
18422  if (typesz2 != typesz or sz != sz2){
18423  char *result2;
18424  result2 = (char *) malloc(typesz2);
18425  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18426 
18427  v2->assign(result2, result2+sz2);
18428  free(result2);
18429  }
18430  else{
18431  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18432 
18433  v2->assign(result, result+sz2);
18434  }
18435 
18436  clReleaseCommandQueue (queue);
18437  clReleaseMemObject(buffer);
18438  clReleaseMemObject(buffer2);
18439  clReleaseMemObject(buffer3);
18440  clReleaseEvent(gpuExec);
18441  free(result);
18442  }
18443 
18444  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
18445  size_t sz = v->size();
18446  size_t sz2 = v2->size();
18447  size_t sz3 = v3->size();
18448  size_t typesz = sizeof(float) * sz;
18449  size_t typesz2 = sizeof(char) * sz2;
18450  size_t typesz3 = sizeof(char) * sz3;
18451  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18452  if (temp_sz > 0){
18453  if (temp_sz > 2){
18454  if (temp_sz > 3){
18455  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18456  }
18457  typesz = sizeof(float) * params->buffers_size[0];
18458  typesz2 = sizeof(char) * params->buffers_size[1];
18459  typesz3 = sizeof(char) * params->buffers_size[2];
18460  }
18461  else{
18462  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18463  }
18464  }
18465  cl_int error = 0;
18466  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18467  checkError(error);
18468  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18469  checkError(error);
18470  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18471  checkError(error);
18472  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18473  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18474  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18475  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18476 
18477  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18478  checkError (error);
18479  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18480  checkError (error);
18481  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18482  checkError (error);
18483 
18484  size_t size[3] = {sz, sz2, sz3};
18485  size_t work_dimension = 3;
18486 
18487  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18488  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18489  work_dimension = 1;
18490  }
18491  else if(temp_sz > 0){
18492  if (params->multi_dimensional){
18493  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18494  For default multidimensional global work size, leave the global_work_size vector empty, \
18495  and set multi_dimensional to true. Setting the global work size based on the values inside \
18496  the global_work_size vector.");
18497  }
18498  if (temp_sz == 1){
18499  size[0] = params->global_work_size[0];
18500  work_dimension = 1;
18501  }
18502  else if (temp_sz == 2){
18503  size[0] = params->global_work_size[0];
18504  size[1] = params->global_work_size[1];
18505  work_dimension = 2;
18506  }
18507  else{
18508  size[0] = params->global_work_size[0];
18509  size[1] = params->global_work_size[1];
18510  size[2] = params->global_work_size[2];
18511  if (temp_sz > 3){
18512  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18513  }
18514  }
18515  }
18516 
18517  cl_event gpuExec;
18518 
18519  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18520 
18521  clWaitForEvents(1, &gpuExec);
18522 
18523  float *result = (float *) malloc(typesz);
18524  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18525 
18526  v->assign(result, result+sz);
18527 
18528  if (typesz2 != typesz or sz != sz2){
18529  char *result2;
18530  result2 = (char *) malloc(typesz2);
18531  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18532 
18533  v2->assign(result2, result2+sz2);
18534  free(result2);
18535  }
18536  else{
18537  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18538 
18539  v2->assign(result, result+sz2);
18540  }
18541 
18542  if (typesz3 != typesz or sz != sz3){
18543  char *result3;
18544  result3 = (char *) malloc(typesz3);
18545  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18546 
18547  v3->assign(result3, result3+sz3);
18548  free(result3);
18549  }
18550  else{
18551  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18552 
18553  v3->assign(result, result+sz3);
18554  }
18555 
18556  clReleaseCommandQueue (queue);
18557  clReleaseMemObject(buffer);
18558  clReleaseMemObject(buffer2);
18559  clReleaseMemObject(buffer3);
18560  clReleaseEvent(gpuExec);
18561  free(result);
18562  }
18563 
18564 
18565  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
18566  size_t sz = v.size();
18567  size_t sz2 = v2.size();
18568  size_t sz3 = v3.size();
18569  size_t typesz = sizeof(float) * sz;
18570  size_t typesz2 = sizeof(char) * sz2;
18571  size_t typesz3 = sizeof(int) * sz3;
18572  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18573  if (temp_sz > 0){
18574  if (temp_sz > 2){
18575  if (temp_sz > 3){
18576  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18577  }
18578  typesz = sizeof(float) * params->buffers_size[0];
18579  typesz2 = sizeof(char) * params->buffers_size[1];
18580  typesz3 = sizeof(int) * params->buffers_size[2];
18581  }
18582  else{
18583  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18584  }
18585  }
18586  cl_int error = 0;
18587  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18588  checkError(error);
18589  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18590  checkError(error);
18591  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18592  checkError(error);
18593  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18594  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18595  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18596  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18597 
18598  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
18599  checkError (error);
18600  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18601  checkError (error);
18602  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18603  checkError (error);
18604 
18605  size_t size[3] = {sz, sz2, sz3};
18606  size_t work_dimension = 3;
18607 
18608  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18609  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18610  work_dimension = 1;
18611  }
18612  else if(temp_sz > 0){
18613  if (params->multi_dimensional){
18614  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18615  For default multidimensional global work size, leave the global_work_size vector empty, \
18616  and set multi_dimensional to true. Setting the global work size based on the values inside \
18617  the global_work_size vector.");
18618  }
18619  if (temp_sz == 1){
18620  size[0] = params->global_work_size[0];
18621  work_dimension = 1;
18622  }
18623  else if (temp_sz == 2){
18624  size[0] = params->global_work_size[0];
18625  size[1] = params->global_work_size[1];
18626  work_dimension = 2;
18627  }
18628  else{
18629  size[0] = params->global_work_size[0];
18630  size[1] = params->global_work_size[1];
18631  size[2] = params->global_work_size[2];
18632  if (temp_sz > 3){
18633  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18634  }
18635  }
18636  }
18637 
18638  cl_event gpuExec;
18639 
18640  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18641 
18642  clWaitForEvents(1, &gpuExec);
18643 
18644  float *result = (float *) malloc(typesz);
18645  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18646 
18647  std::vector<float> res = std::vector<float>();
18648  res.assign(result, result+sz);
18649 
18650  clReleaseCommandQueue (queue);
18651  clReleaseMemObject(buffer);
18652  clReleaseMemObject(buffer2);
18653  clReleaseMemObject(buffer3);
18654  clReleaseEvent(gpuExec);
18655  free(result);
18656 
18657  return res;
18658  }
18659 
18660  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
18661  size_t sz = v->size();
18662  size_t sz2 = v2.size();
18663  size_t sz3 = v3.size();
18664  size_t typesz = sizeof(float) * sz;
18665  size_t typesz2 = sizeof(char) * sz2;
18666  size_t typesz3 = sizeof(int) * sz3;
18667  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18668  if (temp_sz > 0){
18669  if (temp_sz > 2){
18670  if (temp_sz > 3){
18671  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18672  }
18673  typesz = sizeof(float) * params->buffers_size[0];
18674  typesz2 = sizeof(char) * params->buffers_size[1];
18675  typesz3 = sizeof(int) * params->buffers_size[2];
18676  }
18677  else{
18678  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18679  }
18680  }
18681  cl_int error = 0;
18682  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18683  checkError(error);
18684  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18685  checkError(error);
18686  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18687  checkError(error);
18688  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18689  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18690  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18691  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18692 
18693  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18694  checkError (error);
18695  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18696  checkError (error);
18697  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18698  checkError (error);
18699 
18700  size_t size[3] = {sz, sz2, sz3};
18701  size_t work_dimension = 3;
18702 
18703  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18704  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18705  work_dimension = 1;
18706  }
18707  else if(temp_sz > 0){
18708  if (params->multi_dimensional){
18709  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18710  For default multidimensional global work size, leave the global_work_size vector empty, \
18711  and set multi_dimensional to true. Setting the global work size based on the values inside \
18712  the global_work_size vector.");
18713  }
18714  if (temp_sz == 1){
18715  size[0] = params->global_work_size[0];
18716  work_dimension = 1;
18717  }
18718  else if (temp_sz == 2){
18719  size[0] = params->global_work_size[0];
18720  size[1] = params->global_work_size[1];
18721  work_dimension = 2;
18722  }
18723  else{
18724  size[0] = params->global_work_size[0];
18725  size[1] = params->global_work_size[1];
18726  size[2] = params->global_work_size[2];
18727  if (temp_sz > 3){
18728  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18729  }
18730  }
18731  }
18732 
18733  cl_event gpuExec;
18734 
18735  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18736 
18737  clWaitForEvents(1, &gpuExec);
18738 
18739  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18740 
18741  clWaitForEvents(1, &gpuExec);
18742 
18743  float *result = (float *) malloc(typesz);
18744  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18745 
18746  v->assign(result, result+sz);
18747 
18748  clReleaseCommandQueue (queue);
18749  clReleaseMemObject(buffer);
18750  clReleaseMemObject(buffer2);
18751  clReleaseMemObject(buffer3);
18752  clReleaseEvent(gpuExec);
18753  free(result);
18754  }
18755 
18756  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
18757  size_t sz = v->size();
18758  size_t sz2 = v2->size();
18759  size_t sz3 = v3.size();
18760  size_t typesz = sizeof(float) * sz;
18761  size_t typesz2 = sizeof(char) * sz2;
18762  size_t typesz3 = sizeof(int) * sz3;
18763  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18764  if (temp_sz > 0){
18765  if (temp_sz > 2){
18766  if (temp_sz > 3){
18767  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18768  }
18769  typesz = sizeof(float) * params->buffers_size[0];
18770  typesz2 = sizeof(char) * params->buffers_size[1];
18771  typesz3 = sizeof(int) * params->buffers_size[2];
18772  }
18773  else{
18774  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18775  }
18776  }
18777  cl_int error = 0;
18778  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18779  checkError(error);
18780  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18781  checkError(error);
18782  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18783  checkError(error);
18784  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18785  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18786  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18787  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18788 
18789  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18790  checkError (error);
18791  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18792  checkError (error);
18793  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18794  checkError (error);
18795 
18796  size_t size[3] = {sz, sz2, sz3};
18797  size_t work_dimension = 3;
18798 
18799  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18800  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18801  work_dimension = 1;
18802  }
18803  else if(temp_sz > 0){
18804  if (params->multi_dimensional){
18805  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18806  For default multidimensional global work size, leave the global_work_size vector empty, \
18807  and set multi_dimensional to true. Setting the global work size based on the values inside \
18808  the global_work_size vector.");
18809  }
18810  if (temp_sz == 1){
18811  size[0] = params->global_work_size[0];
18812  work_dimension = 1;
18813  }
18814  else if (temp_sz == 2){
18815  size[0] = params->global_work_size[0];
18816  size[1] = params->global_work_size[1];
18817  work_dimension = 2;
18818  }
18819  else{
18820  size[0] = params->global_work_size[0];
18821  size[1] = params->global_work_size[1];
18822  size[2] = params->global_work_size[2];
18823  if (temp_sz > 3){
18824  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18825  }
18826  }
18827  }
18828 
18829  cl_event gpuExec;
18830 
18831  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18832 
18833  clWaitForEvents(1, &gpuExec);
18834 
18835  float *result = (float *) malloc(typesz);
18836  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18837 
18838  v->assign(result, result+sz);
18839 
18840  if (typesz2 != typesz or sz != sz2){
18841  char *result2;
18842  result2 = (char *) malloc(typesz2);
18843  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18844 
18845  v2->assign(result2, result2+sz2);
18846  free(result2);
18847  }
18848  else{
18849  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18850 
18851  v2->assign(result, result+sz2);
18852  }
18853 
18854  clReleaseCommandQueue (queue);
18855  clReleaseMemObject(buffer);
18856  clReleaseMemObject(buffer2);
18857  clReleaseMemObject(buffer3);
18858  clReleaseEvent(gpuExec);
18859  free(result);
18860  }
18861 
18862  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
18863  size_t sz = v->size();
18864  size_t sz2 = v2->size();
18865  size_t sz3 = v3->size();
18866  size_t typesz = sizeof(float) * sz;
18867  size_t typesz2 = sizeof(char) * sz2;
18868  size_t typesz3 = sizeof(int) * sz3;
18869  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18870  if (temp_sz > 0){
18871  if (temp_sz > 2){
18872  if (temp_sz > 3){
18873  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18874  }
18875  typesz = sizeof(float) * params->buffers_size[0];
18876  typesz2 = sizeof(char) * params->buffers_size[1];
18877  typesz3 = sizeof(int) * params->buffers_size[2];
18878  }
18879  else{
18880  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18881  }
18882  }
18883  cl_int error = 0;
18884  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18885  checkError(error);
18886  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18887  checkError(error);
18888  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18889  checkError(error);
18890  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
18891  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
18892  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
18893  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
18894 
18895  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18896  checkError (error);
18897  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18898  checkError (error);
18899  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18900  checkError (error);
18901 
18902  size_t size[3] = {sz, sz2, sz3};
18903  size_t work_dimension = 3;
18904 
18905  temp_sz = params != NULL ? params->global_work_size.size() : 0;
18906  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
18907  work_dimension = 1;
18908  }
18909  else if(temp_sz > 0){
18910  if (params->multi_dimensional){
18911  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
18912  For default multidimensional global work size, leave the global_work_size vector empty, \
18913  and set multi_dimensional to true. Setting the global work size based on the values inside \
18914  the global_work_size vector.");
18915  }
18916  if (temp_sz == 1){
18917  size[0] = params->global_work_size[0];
18918  work_dimension = 1;
18919  }
18920  else if (temp_sz == 2){
18921  size[0] = params->global_work_size[0];
18922  size[1] = params->global_work_size[1];
18923  work_dimension = 2;
18924  }
18925  else{
18926  size[0] = params->global_work_size[0];
18927  size[1] = params->global_work_size[1];
18928  size[2] = params->global_work_size[2];
18929  if (temp_sz > 3){
18930  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18931  }
18932  }
18933  }
18934 
18935  cl_event gpuExec;
18936 
18937  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18938 
18939  clWaitForEvents(1, &gpuExec);
18940 
18941  float *result = (float *) malloc(typesz);
18942  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18943 
18944  v->assign(result, result+sz);
18945 
18946  if (typesz2 != typesz or sz != sz2){
18947  char *result2;
18948  result2 = (char *) malloc(typesz2);
18949  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18950 
18951  v2->assign(result2, result2+sz2);
18952  free(result2);
18953  }
18954  else{
18955  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18956 
18957  v2->assign(result, result+sz2);
18958  }
18959 
18960  if (typesz3 != typesz or sz != sz3){
18961  int *result3;
18962  result3 = (int *) malloc(typesz3);
18963  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18964 
18965  v3->assign(result3, result3+sz3);
18966  free(result3);
18967  }
18968  else{
18969  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18970 
18971  v3->assign(result, result+sz3);
18972  }
18973 
18974  clReleaseCommandQueue (queue);
18975  clReleaseMemObject(buffer);
18976  clReleaseMemObject(buffer2);
18977  clReleaseMemObject(buffer3);
18978  clReleaseEvent(gpuExec);
18979  free(result);
18980  }
18981 
18982 
18983  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
18984  size_t sz = v.size();
18985  size_t sz2 = v2.size();
18986  size_t sz3 = v3.size();
18987  size_t typesz = sizeof(float) * sz;
18988  size_t typesz2 = sizeof(char) * sz2;
18989  size_t typesz3 = sizeof(float) * sz3;
18990  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
18991  if (temp_sz > 0){
18992  if (temp_sz > 2){
18993  if (temp_sz > 3){
18994  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18995  }
18996  typesz = sizeof(float) * params->buffers_size[0];
18997  typesz2 = sizeof(char) * params->buffers_size[1];
18998  typesz3 = sizeof(float) * params->buffers_size[2];
18999  }
19000  else{
19001  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19002  }
19003  }
19004  cl_int error = 0;
19005  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19006  checkError(error);
19007  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19008  checkError(error);
19009  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19010  checkError(error);
19011  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19012  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19013  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19014  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19015 
19016  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19017  checkError (error);
19018  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19019  checkError (error);
19020  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19021  checkError (error);
19022 
19023  size_t size[3] = {sz, sz2, sz3};
19024  size_t work_dimension = 3;
19025 
19026  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19027  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19028  work_dimension = 1;
19029  }
19030  else if(temp_sz > 0){
19031  if (params->multi_dimensional){
19032  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19033  For default multidimensional global work size, leave the global_work_size vector empty, \
19034  and set multi_dimensional to true. Setting the global work size based on the values inside \
19035  the global_work_size vector.");
19036  }
19037  if (temp_sz == 1){
19038  size[0] = params->global_work_size[0];
19039  work_dimension = 1;
19040  }
19041  else if (temp_sz == 2){
19042  size[0] = params->global_work_size[0];
19043  size[1] = params->global_work_size[1];
19044  work_dimension = 2;
19045  }
19046  else{
19047  size[0] = params->global_work_size[0];
19048  size[1] = params->global_work_size[1];
19049  size[2] = params->global_work_size[2];
19050  if (temp_sz > 3){
19051  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19052  }
19053  }
19054  }
19055 
19056  cl_event gpuExec;
19057 
19058  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19059 
19060  clWaitForEvents(1, &gpuExec);
19061 
19062  float *result = (float *) malloc(typesz);
19063  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19064 
19065  std::vector<float> res = std::vector<float>();
19066  res.assign(result, result+sz);
19067 
19068  clReleaseCommandQueue (queue);
19069  clReleaseMemObject(buffer);
19070  clReleaseMemObject(buffer2);
19071  clReleaseMemObject(buffer3);
19072  clReleaseEvent(gpuExec);
19073  free(result);
19074 
19075  return res;
19076  }
19077 
19078  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
19079  size_t sz = v->size();
19080  size_t sz2 = v2.size();
19081  size_t sz3 = v3.size();
19082  size_t typesz = sizeof(float) * sz;
19083  size_t typesz2 = sizeof(char) * sz2;
19084  size_t typesz3 = sizeof(float) * sz3;
19085  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19086  if (temp_sz > 0){
19087  if (temp_sz > 2){
19088  if (temp_sz > 3){
19089  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19090  }
19091  typesz = sizeof(float) * params->buffers_size[0];
19092  typesz2 = sizeof(char) * params->buffers_size[1];
19093  typesz3 = sizeof(float) * params->buffers_size[2];
19094  }
19095  else{
19096  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19097  }
19098  }
19099  cl_int error = 0;
19100  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19101  checkError(error);
19102  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19103  checkError(error);
19104  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19105  checkError(error);
19106  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19107  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19108  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19109  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19110 
19111  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19112  checkError (error);
19113  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19114  checkError (error);
19115  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19116  checkError (error);
19117 
19118  size_t size[3] = {sz, sz2, sz3};
19119  size_t work_dimension = 3;
19120 
19121  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19122  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19123  work_dimension = 1;
19124  }
19125  else if(temp_sz > 0){
19126  if (params->multi_dimensional){
19127  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19128  For default multidimensional global work size, leave the global_work_size vector empty, \
19129  and set multi_dimensional to true. Setting the global work size based on the values inside \
19130  the global_work_size vector.");
19131  }
19132  if (temp_sz == 1){
19133  size[0] = params->global_work_size[0];
19134  work_dimension = 1;
19135  }
19136  else if (temp_sz == 2){
19137  size[0] = params->global_work_size[0];
19138  size[1] = params->global_work_size[1];
19139  work_dimension = 2;
19140  }
19141  else{
19142  size[0] = params->global_work_size[0];
19143  size[1] = params->global_work_size[1];
19144  size[2] = params->global_work_size[2];
19145  if (temp_sz > 3){
19146  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19147  }
19148  }
19149  }
19150 
19151  cl_event gpuExec;
19152 
19153  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19154 
19155  clWaitForEvents(1, &gpuExec);
19156 
19157  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19158 
19159  clWaitForEvents(1, &gpuExec);
19160 
19161  float *result = (float *) malloc(typesz);
19162  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19163 
19164  v->assign(result, result+sz);
19165 
19166  clReleaseCommandQueue (queue);
19167  clReleaseMemObject(buffer);
19168  clReleaseMemObject(buffer2);
19169  clReleaseMemObject(buffer3);
19170  clReleaseEvent(gpuExec);
19171  free(result);
19172  }
19173 
19174  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
19175  size_t sz = v->size();
19176  size_t sz2 = v2->size();
19177  size_t sz3 = v3.size();
19178  size_t typesz = sizeof(float) * sz;
19179  size_t typesz2 = sizeof(char) * sz2;
19180  size_t typesz3 = sizeof(float) * sz3;
19181  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19182  if (temp_sz > 0){
19183  if (temp_sz > 2){
19184  if (temp_sz > 3){
19185  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19186  }
19187  typesz = sizeof(float) * params->buffers_size[0];
19188  typesz2 = sizeof(char) * params->buffers_size[1];
19189  typesz3 = sizeof(float) * params->buffers_size[2];
19190  }
19191  else{
19192  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19193  }
19194  }
19195  cl_int error = 0;
19196  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19197  checkError(error);
19198  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19199  checkError(error);
19200  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19201  checkError(error);
19202  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19203  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19204  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19205  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19206 
19207  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19208  checkError (error);
19209  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19210  checkError (error);
19211  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19212  checkError (error);
19213 
19214  size_t size[3] = {sz, sz2, sz3};
19215  size_t work_dimension = 3;
19216 
19217  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19218  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19219  work_dimension = 1;
19220  }
19221  else if(temp_sz > 0){
19222  if (params->multi_dimensional){
19223  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19224  For default multidimensional global work size, leave the global_work_size vector empty, \
19225  and set multi_dimensional to true. Setting the global work size based on the values inside \
19226  the global_work_size vector.");
19227  }
19228  if (temp_sz == 1){
19229  size[0] = params->global_work_size[0];
19230  work_dimension = 1;
19231  }
19232  else if (temp_sz == 2){
19233  size[0] = params->global_work_size[0];
19234  size[1] = params->global_work_size[1];
19235  work_dimension = 2;
19236  }
19237  else{
19238  size[0] = params->global_work_size[0];
19239  size[1] = params->global_work_size[1];
19240  size[2] = params->global_work_size[2];
19241  if (temp_sz > 3){
19242  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19243  }
19244  }
19245  }
19246 
19247  cl_event gpuExec;
19248 
19249  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19250 
19251  clWaitForEvents(1, &gpuExec);
19252 
19253  float *result = (float *) malloc(typesz);
19254  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19255 
19256  v->assign(result, result+sz);
19257 
19258  if (typesz2 != typesz or sz != sz2){
19259  char *result2;
19260  result2 = (char *) malloc(typesz2);
19261  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19262 
19263  v2->assign(result2, result2+sz2);
19264  free(result2);
19265  }
19266  else{
19267  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19268 
19269  v2->assign(result, result+sz2);
19270  }
19271 
19272  clReleaseCommandQueue (queue);
19273  clReleaseMemObject(buffer);
19274  clReleaseMemObject(buffer2);
19275  clReleaseMemObject(buffer3);
19276  clReleaseEvent(gpuExec);
19277  free(result);
19278  }
19279 
19280  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
19281  size_t sz = v->size();
19282  size_t sz2 = v2->size();
19283  size_t sz3 = v3->size();
19284  size_t typesz = sizeof(float) * sz;
19285  size_t typesz2 = sizeof(char) * sz2;
19286  size_t typesz3 = sizeof(float) * sz3;
19287  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19288  if (temp_sz > 0){
19289  if (temp_sz > 2){
19290  if (temp_sz > 3){
19291  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19292  }
19293  typesz = sizeof(float) * params->buffers_size[0];
19294  typesz2 = sizeof(char) * params->buffers_size[1];
19295  typesz3 = sizeof(float) * params->buffers_size[2];
19296  }
19297  else{
19298  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19299  }
19300  }
19301  cl_int error = 0;
19302  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19303  checkError(error);
19304  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19305  checkError(error);
19306  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
19307  checkError(error);
19308  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19309  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19310  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19311  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19312 
19313  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19314  checkError (error);
19315  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19316  checkError (error);
19317  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
19318  checkError (error);
19319 
19320  size_t size[3] = {sz, sz2, sz3};
19321  size_t work_dimension = 3;
19322 
19323  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19324  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19325  work_dimension = 1;
19326  }
19327  else if(temp_sz > 0){
19328  if (params->multi_dimensional){
19329  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19330  For default multidimensional global work size, leave the global_work_size vector empty, \
19331  and set multi_dimensional to true. Setting the global work size based on the values inside \
19332  the global_work_size vector.");
19333  }
19334  if (temp_sz == 1){
19335  size[0] = params->global_work_size[0];
19336  work_dimension = 1;
19337  }
19338  else if (temp_sz == 2){
19339  size[0] = params->global_work_size[0];
19340  size[1] = params->global_work_size[1];
19341  work_dimension = 2;
19342  }
19343  else{
19344  size[0] = params->global_work_size[0];
19345  size[1] = params->global_work_size[1];
19346  size[2] = params->global_work_size[2];
19347  if (temp_sz > 3){
19348  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19349  }
19350  }
19351  }
19352 
19353  cl_event gpuExec;
19354 
19355  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19356 
19357  clWaitForEvents(1, &gpuExec);
19358 
19359  float *result = (float *) malloc(typesz);
19360  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19361 
19362  v->assign(result, result+sz);
19363 
19364  if (typesz2 != typesz or sz != sz2){
19365  char *result2;
19366  result2 = (char *) malloc(typesz2);
19367  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19368 
19369  v2->assign(result2, result2+sz2);
19370  free(result2);
19371  }
19372  else{
19373  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19374 
19375  v2->assign(result, result+sz2);
19376  }
19377 
19378  if (typesz3 != typesz or sz != sz3){
19379  float *result3;
19380  result3 = (float *) malloc(typesz3);
19381  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
19382 
19383  v3->assign(result3, result3+sz3);
19384  free(result3);
19385  }
19386  else{
19387  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19388 
19389  v3->assign(result, result+sz3);
19390  }
19391 
19392  clReleaseCommandQueue (queue);
19393  clReleaseMemObject(buffer);
19394  clReleaseMemObject(buffer2);
19395  clReleaseMemObject(buffer3);
19396  clReleaseEvent(gpuExec);
19397  free(result);
19398  }
19399 
19400 
19401  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
19402  size_t sz = v.size();
19403  size_t sz2 = v2.size();
19404  size_t sz3 = v3.size();
19405  size_t typesz = sizeof(float) * sz;
19406  size_t typesz2 = sizeof(char) * sz2;
19407  size_t typesz3 = sizeof(double) * sz3;
19408  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19409  if (temp_sz > 0){
19410  if (temp_sz > 2){
19411  if (temp_sz > 3){
19412  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19413  }
19414  typesz = sizeof(float) * params->buffers_size[0];
19415  typesz2 = sizeof(char) * params->buffers_size[1];
19416  typesz3 = sizeof(double) * params->buffers_size[2];
19417  }
19418  else{
19419  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19420  }
19421  }
19422  cl_int error = 0;
19423  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19424  checkError(error);
19425  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19426  checkError(error);
19427  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19428  checkError(error);
19429  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19430  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19431  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19432  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19433 
19434  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19435  checkError (error);
19436  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19437  checkError (error);
19438  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19439  checkError (error);
19440 
19441  size_t size[3] = {sz, sz2, sz3};
19442  size_t work_dimension = 3;
19443 
19444  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19445  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19446  work_dimension = 1;
19447  }
19448  else if(temp_sz > 0){
19449  if (params->multi_dimensional){
19450  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19451  For default multidimensional global work size, leave the global_work_size vector empty, \
19452  and set multi_dimensional to true. Setting the global work size based on the values inside \
19453  the global_work_size vector.");
19454  }
19455  if (temp_sz == 1){
19456  size[0] = params->global_work_size[0];
19457  work_dimension = 1;
19458  }
19459  else if (temp_sz == 2){
19460  size[0] = params->global_work_size[0];
19461  size[1] = params->global_work_size[1];
19462  work_dimension = 2;
19463  }
19464  else{
19465  size[0] = params->global_work_size[0];
19466  size[1] = params->global_work_size[1];
19467  size[2] = params->global_work_size[2];
19468  if (temp_sz > 3){
19469  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19470  }
19471  }
19472  }
19473 
19474  cl_event gpuExec;
19475 
19476  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19477 
19478  clWaitForEvents(1, &gpuExec);
19479 
19480  float *result = (float *) malloc(typesz);
19481  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19482 
19483  std::vector<float> res = std::vector<float>();
19484  res.assign(result, result+sz);
19485 
19486  clReleaseCommandQueue (queue);
19487  clReleaseMemObject(buffer);
19488  clReleaseMemObject(buffer2);
19489  clReleaseMemObject(buffer3);
19490  clReleaseEvent(gpuExec);
19491  free(result);
19492 
19493  return res;
19494  }
19495 
19496  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
19497  size_t sz = v->size();
19498  size_t sz2 = v2.size();
19499  size_t sz3 = v3.size();
19500  size_t typesz = sizeof(float) * sz;
19501  size_t typesz2 = sizeof(char) * sz2;
19502  size_t typesz3 = sizeof(double) * sz3;
19503  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19504  if (temp_sz > 0){
19505  if (temp_sz > 2){
19506  if (temp_sz > 3){
19507  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19508  }
19509  typesz = sizeof(float) * params->buffers_size[0];
19510  typesz2 = sizeof(char) * params->buffers_size[1];
19511  typesz3 = sizeof(double) * params->buffers_size[2];
19512  }
19513  else{
19514  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19515  }
19516  }
19517  cl_int error = 0;
19518  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19519  checkError(error);
19520  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19521  checkError(error);
19522  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19523  checkError(error);
19524  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19525  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19526  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19527  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19528 
19529  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19530  checkError (error);
19531  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19532  checkError (error);
19533  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19534  checkError (error);
19535 
19536  size_t size[3] = {sz, sz2, sz3};
19537  size_t work_dimension = 3;
19538 
19539  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19540  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19541  work_dimension = 1;
19542  }
19543  else if(temp_sz > 0){
19544  if (params->multi_dimensional){
19545  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19546  For default multidimensional global work size, leave the global_work_size vector empty, \
19547  and set multi_dimensional to true. Setting the global work size based on the values inside \
19548  the global_work_size vector.");
19549  }
19550  if (temp_sz == 1){
19551  size[0] = params->global_work_size[0];
19552  work_dimension = 1;
19553  }
19554  else if (temp_sz == 2){
19555  size[0] = params->global_work_size[0];
19556  size[1] = params->global_work_size[1];
19557  work_dimension = 2;
19558  }
19559  else{
19560  size[0] = params->global_work_size[0];
19561  size[1] = params->global_work_size[1];
19562  size[2] = params->global_work_size[2];
19563  if (temp_sz > 3){
19564  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19565  }
19566  }
19567  }
19568 
19569  cl_event gpuExec;
19570 
19571  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19572 
19573  clWaitForEvents(1, &gpuExec);
19574 
19575  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19576 
19577  clWaitForEvents(1, &gpuExec);
19578 
19579  float *result = (float *) malloc(typesz);
19580  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19581 
19582  v->assign(result, result+sz);
19583 
19584  clReleaseCommandQueue (queue);
19585  clReleaseMemObject(buffer);
19586  clReleaseMemObject(buffer2);
19587  clReleaseMemObject(buffer3);
19588  clReleaseEvent(gpuExec);
19589  free(result);
19590  }
19591 
19592  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
19593  size_t sz = v->size();
19594  size_t sz2 = v2->size();
19595  size_t sz3 = v3.size();
19596  size_t typesz = sizeof(float) * sz;
19597  size_t typesz2 = sizeof(char) * sz2;
19598  size_t typesz3 = sizeof(double) * sz3;
19599  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19600  if (temp_sz > 0){
19601  if (temp_sz > 2){
19602  if (temp_sz > 3){
19603  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19604  }
19605  typesz = sizeof(float) * params->buffers_size[0];
19606  typesz2 = sizeof(char) * params->buffers_size[1];
19607  typesz3 = sizeof(double) * params->buffers_size[2];
19608  }
19609  else{
19610  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19611  }
19612  }
19613  cl_int error = 0;
19614  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19615  checkError(error);
19616  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19617  checkError(error);
19618  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19619  checkError(error);
19620  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19621  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19622  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19623  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19624 
19625  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19626  checkError (error);
19627  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19628  checkError (error);
19629  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19630  checkError (error);
19631 
19632  size_t size[3] = {sz, sz2, sz3};
19633  size_t work_dimension = 3;
19634 
19635  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19636  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19637  work_dimension = 1;
19638  }
19639  else if(temp_sz > 0){
19640  if (params->multi_dimensional){
19641  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19642  For default multidimensional global work size, leave the global_work_size vector empty, \
19643  and set multi_dimensional to true. Setting the global work size based on the values inside \
19644  the global_work_size vector.");
19645  }
19646  if (temp_sz == 1){
19647  size[0] = params->global_work_size[0];
19648  work_dimension = 1;
19649  }
19650  else if (temp_sz == 2){
19651  size[0] = params->global_work_size[0];
19652  size[1] = params->global_work_size[1];
19653  work_dimension = 2;
19654  }
19655  else{
19656  size[0] = params->global_work_size[0];
19657  size[1] = params->global_work_size[1];
19658  size[2] = params->global_work_size[2];
19659  if (temp_sz > 3){
19660  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19661  }
19662  }
19663  }
19664 
19665  cl_event gpuExec;
19666 
19667  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19668 
19669  clWaitForEvents(1, &gpuExec);
19670 
19671  float *result = (float *) malloc(typesz);
19672  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19673 
19674  v->assign(result, result+sz);
19675 
19676  if (typesz2 != typesz or sz != sz2){
19677  char *result2;
19678  result2 = (char *) malloc(typesz2);
19679  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19680 
19681  v2->assign(result2, result2+sz2);
19682  free(result2);
19683  }
19684  else{
19685  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19686 
19687  v2->assign(result, result+sz2);
19688  }
19689 
19690  clReleaseCommandQueue (queue);
19691  clReleaseMemObject(buffer);
19692  clReleaseMemObject(buffer2);
19693  clReleaseMemObject(buffer3);
19694  clReleaseEvent(gpuExec);
19695  free(result);
19696  }
19697 
19698  void ROS_OpenCL::process(std::vector<float>* v, std::vector<char>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
19699  size_t sz = v->size();
19700  size_t sz2 = v2->size();
19701  size_t sz3 = v3->size();
19702  size_t typesz = sizeof(float) * sz;
19703  size_t typesz2 = sizeof(char) * sz2;
19704  size_t typesz3 = sizeof(double) * sz3;
19705  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19706  if (temp_sz > 0){
19707  if (temp_sz > 2){
19708  if (temp_sz > 3){
19709  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19710  }
19711  typesz = sizeof(float) * params->buffers_size[0];
19712  typesz2 = sizeof(char) * params->buffers_size[1];
19713  typesz3 = sizeof(double) * params->buffers_size[2];
19714  }
19715  else{
19716  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19717  }
19718  }
19719  cl_int error = 0;
19720  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19721  checkError(error);
19722  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19723  checkError(error);
19724  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
19725  checkError(error);
19726  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19727  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19728  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19729  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19730 
19731  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19732  checkError (error);
19733  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19734  checkError (error);
19735  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
19736  checkError (error);
19737 
19738  size_t size[3] = {sz, sz2, sz3};
19739  size_t work_dimension = 3;
19740 
19741  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19742  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19743  work_dimension = 1;
19744  }
19745  else if(temp_sz > 0){
19746  if (params->multi_dimensional){
19747  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19748  For default multidimensional global work size, leave the global_work_size vector empty, \
19749  and set multi_dimensional to true. Setting the global work size based on the values inside \
19750  the global_work_size vector.");
19751  }
19752  if (temp_sz == 1){
19753  size[0] = params->global_work_size[0];
19754  work_dimension = 1;
19755  }
19756  else if (temp_sz == 2){
19757  size[0] = params->global_work_size[0];
19758  size[1] = params->global_work_size[1];
19759  work_dimension = 2;
19760  }
19761  else{
19762  size[0] = params->global_work_size[0];
19763  size[1] = params->global_work_size[1];
19764  size[2] = params->global_work_size[2];
19765  if (temp_sz > 3){
19766  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19767  }
19768  }
19769  }
19770 
19771  cl_event gpuExec;
19772 
19773  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19774 
19775  clWaitForEvents(1, &gpuExec);
19776 
19777  float *result = (float *) malloc(typesz);
19778  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19779 
19780  v->assign(result, result+sz);
19781 
19782  if (typesz2 != typesz or sz != sz2){
19783  char *result2;
19784  result2 = (char *) malloc(typesz2);
19785  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19786 
19787  v2->assign(result2, result2+sz2);
19788  free(result2);
19789  }
19790  else{
19791  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19792 
19793  v2->assign(result, result+sz2);
19794  }
19795 
19796  if (typesz3 != typesz or sz != sz3){
19797  double *result3;
19798  result3 = (double *) malloc(typesz3);
19799  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
19800 
19801  v3->assign(result3, result3+sz3);
19802  free(result3);
19803  }
19804  else{
19805  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19806 
19807  v3->assign(result, result+sz3);
19808  }
19809 
19810  clReleaseCommandQueue (queue);
19811  clReleaseMemObject(buffer);
19812  clReleaseMemObject(buffer2);
19813  clReleaseMemObject(buffer3);
19814  clReleaseEvent(gpuExec);
19815  free(result);
19816  }
19817 
19818 
19819  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
19820  size_t sz = v.size();
19821  size_t sz2 = v2.size();
19822  size_t sz3 = v3.size();
19823  size_t typesz = sizeof(float) * sz;
19824  size_t typesz2 = sizeof(int) * sz2;
19825  size_t typesz3 = sizeof(char) * sz3;
19826  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19827  if (temp_sz > 0){
19828  if (temp_sz > 2){
19829  if (temp_sz > 3){
19830  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19831  }
19832  typesz = sizeof(float) * params->buffers_size[0];
19833  typesz2 = sizeof(int) * params->buffers_size[1];
19834  typesz3 = sizeof(char) * params->buffers_size[2];
19835  }
19836  else{
19837  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19838  }
19839  }
19840  cl_int error = 0;
19841  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19842  checkError(error);
19843  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19844  checkError(error);
19845  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19846  checkError(error);
19847  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19848  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19849  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19850  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19851 
19852  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19853  checkError (error);
19854  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19855  checkError (error);
19856  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19857  checkError (error);
19858 
19859  size_t size[3] = {sz, sz2, sz3};
19860  size_t work_dimension = 3;
19861 
19862  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19863  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19864  work_dimension = 1;
19865  }
19866  else if(temp_sz > 0){
19867  if (params->multi_dimensional){
19868  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19869  For default multidimensional global work size, leave the global_work_size vector empty, \
19870  and set multi_dimensional to true. Setting the global work size based on the values inside \
19871  the global_work_size vector.");
19872  }
19873  if (temp_sz == 1){
19874  size[0] = params->global_work_size[0];
19875  work_dimension = 1;
19876  }
19877  else if (temp_sz == 2){
19878  size[0] = params->global_work_size[0];
19879  size[1] = params->global_work_size[1];
19880  work_dimension = 2;
19881  }
19882  else{
19883  size[0] = params->global_work_size[0];
19884  size[1] = params->global_work_size[1];
19885  size[2] = params->global_work_size[2];
19886  if (temp_sz > 3){
19887  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19888  }
19889  }
19890  }
19891 
19892  cl_event gpuExec;
19893 
19894  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19895 
19896  clWaitForEvents(1, &gpuExec);
19897 
19898  float *result = (float *) malloc(typesz);
19899  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19900 
19901  std::vector<float> res = std::vector<float>();
19902  res.assign(result, result+sz);
19903 
19904  clReleaseCommandQueue (queue);
19905  clReleaseMemObject(buffer);
19906  clReleaseMemObject(buffer2);
19907  clReleaseMemObject(buffer3);
19908  clReleaseEvent(gpuExec);
19909  free(result);
19910 
19911  return res;
19912  }
19913 
19914  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
19915  size_t sz = v->size();
19916  size_t sz2 = v2.size();
19917  size_t sz3 = v3.size();
19918  size_t typesz = sizeof(float) * sz;
19919  size_t typesz2 = sizeof(int) * sz2;
19920  size_t typesz3 = sizeof(char) * sz3;
19921  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
19922  if (temp_sz > 0){
19923  if (temp_sz > 2){
19924  if (temp_sz > 3){
19925  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19926  }
19927  typesz = sizeof(float) * params->buffers_size[0];
19928  typesz2 = sizeof(int) * params->buffers_size[1];
19929  typesz3 = sizeof(char) * params->buffers_size[2];
19930  }
19931  else{
19932  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19933  }
19934  }
19935  cl_int error = 0;
19936  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19937  checkError(error);
19938  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19939  checkError(error);
19940  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19941  checkError(error);
19942  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
19943  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
19944  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
19945  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
19946 
19947  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19948  checkError (error);
19949  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19950  checkError (error);
19951  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19952  checkError (error);
19953 
19954  size_t size[3] = {sz, sz2, sz3};
19955  size_t work_dimension = 3;
19956 
19957  temp_sz = params != NULL ? params->global_work_size.size() : 0;
19958  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
19959  work_dimension = 1;
19960  }
19961  else if(temp_sz > 0){
19962  if (params->multi_dimensional){
19963  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
19964  For default multidimensional global work size, leave the global_work_size vector empty, \
19965  and set multi_dimensional to true. Setting the global work size based on the values inside \
19966  the global_work_size vector.");
19967  }
19968  if (temp_sz == 1){
19969  size[0] = params->global_work_size[0];
19970  work_dimension = 1;
19971  }
19972  else if (temp_sz == 2){
19973  size[0] = params->global_work_size[0];
19974  size[1] = params->global_work_size[1];
19975  work_dimension = 2;
19976  }
19977  else{
19978  size[0] = params->global_work_size[0];
19979  size[1] = params->global_work_size[1];
19980  size[2] = params->global_work_size[2];
19981  if (temp_sz > 3){
19982  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19983  }
19984  }
19985  }
19986 
19987  cl_event gpuExec;
19988 
19989  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19990 
19991  clWaitForEvents(1, &gpuExec);
19992 
19993  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19994 
19995  clWaitForEvents(1, &gpuExec);
19996 
19997  float *result = (float *) malloc(typesz);
19998  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19999 
20000  v->assign(result, result+sz);
20001 
20002  clReleaseCommandQueue (queue);
20003  clReleaseMemObject(buffer);
20004  clReleaseMemObject(buffer2);
20005  clReleaseMemObject(buffer3);
20006  clReleaseEvent(gpuExec);
20007  free(result);
20008  }
20009 
20010  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
20011  size_t sz = v->size();
20012  size_t sz2 = v2->size();
20013  size_t sz3 = v3.size();
20014  size_t typesz = sizeof(float) * sz;
20015  size_t typesz2 = sizeof(int) * sz2;
20016  size_t typesz3 = sizeof(char) * sz3;
20017  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20018  if (temp_sz > 0){
20019  if (temp_sz > 2){
20020  if (temp_sz > 3){
20021  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20022  }
20023  typesz = sizeof(float) * params->buffers_size[0];
20024  typesz2 = sizeof(int) * params->buffers_size[1];
20025  typesz3 = sizeof(char) * params->buffers_size[2];
20026  }
20027  else{
20028  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20029  }
20030  }
20031  cl_int error = 0;
20032  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20033  checkError(error);
20034  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20035  checkError(error);
20036  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20037  checkError(error);
20038  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20039  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20040  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20041  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20042 
20043  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20044  checkError (error);
20045  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20046  checkError (error);
20047  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20048  checkError (error);
20049 
20050  size_t size[3] = {sz, sz2, sz3};
20051  size_t work_dimension = 3;
20052 
20053  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20054  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20055  work_dimension = 1;
20056  }
20057  else if(temp_sz > 0){
20058  if (params->multi_dimensional){
20059  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20060  For default multidimensional global work size, leave the global_work_size vector empty, \
20061  and set multi_dimensional to true. Setting the global work size based on the values inside \
20062  the global_work_size vector.");
20063  }
20064  if (temp_sz == 1){
20065  size[0] = params->global_work_size[0];
20066  work_dimension = 1;
20067  }
20068  else if (temp_sz == 2){
20069  size[0] = params->global_work_size[0];
20070  size[1] = params->global_work_size[1];
20071  work_dimension = 2;
20072  }
20073  else{
20074  size[0] = params->global_work_size[0];
20075  size[1] = params->global_work_size[1];
20076  size[2] = params->global_work_size[2];
20077  if (temp_sz > 3){
20078  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20079  }
20080  }
20081  }
20082 
20083  cl_event gpuExec;
20084 
20085  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20086 
20087  clWaitForEvents(1, &gpuExec);
20088 
20089  float *result = (float *) malloc(typesz);
20090  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20091 
20092  v->assign(result, result+sz);
20093 
20094  if (typesz2 != typesz or sz != sz2){
20095  int *result2;
20096  result2 = (int *) malloc(typesz2);
20097  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20098 
20099  v2->assign(result2, result2+sz2);
20100  free(result2);
20101  }
20102  else{
20103  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20104 
20105  v2->assign(result, result+sz2);
20106  }
20107 
20108  clReleaseCommandQueue (queue);
20109  clReleaseMemObject(buffer);
20110  clReleaseMemObject(buffer2);
20111  clReleaseMemObject(buffer3);
20112  clReleaseEvent(gpuExec);
20113  free(result);
20114  }
20115 
20116  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
20117  size_t sz = v->size();
20118  size_t sz2 = v2->size();
20119  size_t sz3 = v3->size();
20120  size_t typesz = sizeof(float) * sz;
20121  size_t typesz2 = sizeof(int) * sz2;
20122  size_t typesz3 = sizeof(char) * sz3;
20123  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20124  if (temp_sz > 0){
20125  if (temp_sz > 2){
20126  if (temp_sz > 3){
20127  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20128  }
20129  typesz = sizeof(float) * params->buffers_size[0];
20130  typesz2 = sizeof(int) * params->buffers_size[1];
20131  typesz3 = sizeof(char) * params->buffers_size[2];
20132  }
20133  else{
20134  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20135  }
20136  }
20137  cl_int error = 0;
20138  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20139  checkError(error);
20140  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20141  checkError(error);
20142  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20143  checkError(error);
20144  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20145  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20146  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20147  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20148 
20149  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20150  checkError (error);
20151  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20152  checkError (error);
20153  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20154  checkError (error);
20155 
20156  size_t size[3] = {sz, sz2, sz3};
20157  size_t work_dimension = 3;
20158 
20159  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20160  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20161  work_dimension = 1;
20162  }
20163  else if(temp_sz > 0){
20164  if (params->multi_dimensional){
20165  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20166  For default multidimensional global work size, leave the global_work_size vector empty, \
20167  and set multi_dimensional to true. Setting the global work size based on the values inside \
20168  the global_work_size vector.");
20169  }
20170  if (temp_sz == 1){
20171  size[0] = params->global_work_size[0];
20172  work_dimension = 1;
20173  }
20174  else if (temp_sz == 2){
20175  size[0] = params->global_work_size[0];
20176  size[1] = params->global_work_size[1];
20177  work_dimension = 2;
20178  }
20179  else{
20180  size[0] = params->global_work_size[0];
20181  size[1] = params->global_work_size[1];
20182  size[2] = params->global_work_size[2];
20183  if (temp_sz > 3){
20184  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20185  }
20186  }
20187  }
20188 
20189  cl_event gpuExec;
20190 
20191  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20192 
20193  clWaitForEvents(1, &gpuExec);
20194 
20195  float *result = (float *) malloc(typesz);
20196  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20197 
20198  v->assign(result, result+sz);
20199 
20200  if (typesz2 != typesz or sz != sz2){
20201  int *result2;
20202  result2 = (int *) malloc(typesz2);
20203  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20204 
20205  v2->assign(result2, result2+sz2);
20206  free(result2);
20207  }
20208  else{
20209  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20210 
20211  v2->assign(result, result+sz2);
20212  }
20213 
20214  if (typesz3 != typesz or sz != sz3){
20215  char *result3;
20216  result3 = (char *) malloc(typesz3);
20217  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
20218 
20219  v3->assign(result3, result3+sz3);
20220  free(result3);
20221  }
20222  else{
20223  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20224 
20225  v3->assign(result, result+sz3);
20226  }
20227 
20228  clReleaseCommandQueue (queue);
20229  clReleaseMemObject(buffer);
20230  clReleaseMemObject(buffer2);
20231  clReleaseMemObject(buffer3);
20232  clReleaseEvent(gpuExec);
20233  free(result);
20234  }
20235 
20236 
20237  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
20238  size_t sz = v.size();
20239  size_t sz2 = v2.size();
20240  size_t sz3 = v3.size();
20241  size_t typesz = sizeof(float) * sz;
20242  size_t typesz2 = sizeof(int) * sz2;
20243  size_t typesz3 = sizeof(int) * sz3;
20244  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20245  if (temp_sz > 0){
20246  if (temp_sz > 2){
20247  if (temp_sz > 3){
20248  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20249  }
20250  typesz = sizeof(float) * params->buffers_size[0];
20251  typesz2 = sizeof(int) * params->buffers_size[1];
20252  typesz3 = sizeof(int) * params->buffers_size[2];
20253  }
20254  else{
20255  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20256  }
20257  }
20258  cl_int error = 0;
20259  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20260  checkError(error);
20261  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20262  checkError(error);
20263  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20264  checkError(error);
20265  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20266  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20267  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20268  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20269 
20270  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
20271  checkError (error);
20272  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20273  checkError (error);
20274  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20275  checkError (error);
20276 
20277  size_t size[3] = {sz, sz2, sz3};
20278  size_t work_dimension = 3;
20279 
20280  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20281  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20282  work_dimension = 1;
20283  }
20284  else if(temp_sz > 0){
20285  if (params->multi_dimensional){
20286  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20287  For default multidimensional global work size, leave the global_work_size vector empty, \
20288  and set multi_dimensional to true. Setting the global work size based on the values inside \
20289  the global_work_size vector.");
20290  }
20291  if (temp_sz == 1){
20292  size[0] = params->global_work_size[0];
20293  work_dimension = 1;
20294  }
20295  else if (temp_sz == 2){
20296  size[0] = params->global_work_size[0];
20297  size[1] = params->global_work_size[1];
20298  work_dimension = 2;
20299  }
20300  else{
20301  size[0] = params->global_work_size[0];
20302  size[1] = params->global_work_size[1];
20303  size[2] = params->global_work_size[2];
20304  if (temp_sz > 3){
20305  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20306  }
20307  }
20308  }
20309 
20310  cl_event gpuExec;
20311 
20312  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20313 
20314  clWaitForEvents(1, &gpuExec);
20315 
20316  float *result = (float *) malloc(typesz);
20317  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20318 
20319  std::vector<float> res = std::vector<float>();
20320  res.assign(result, result+sz);
20321 
20322  clReleaseCommandQueue (queue);
20323  clReleaseMemObject(buffer);
20324  clReleaseMemObject(buffer2);
20325  clReleaseMemObject(buffer3);
20326  clReleaseEvent(gpuExec);
20327  free(result);
20328 
20329  return res;
20330  }
20331 
20332  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
20333  size_t sz = v->size();
20334  size_t sz2 = v2.size();
20335  size_t sz3 = v3.size();
20336  size_t typesz = sizeof(float) * sz;
20337  size_t typesz2 = sizeof(int) * sz2;
20338  size_t typesz3 = sizeof(int) * sz3;
20339  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20340  if (temp_sz > 0){
20341  if (temp_sz > 2){
20342  if (temp_sz > 3){
20343  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20344  }
20345  typesz = sizeof(float) * params->buffers_size[0];
20346  typesz2 = sizeof(int) * params->buffers_size[1];
20347  typesz3 = sizeof(int) * params->buffers_size[2];
20348  }
20349  else{
20350  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20351  }
20352  }
20353  cl_int error = 0;
20354  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20355  checkError(error);
20356  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20357  checkError(error);
20358  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20359  checkError(error);
20360  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20361  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20362  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20363  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20364 
20365  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20366  checkError (error);
20367  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20368  checkError (error);
20369  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20370  checkError (error);
20371 
20372  size_t size[3] = {sz, sz2, sz3};
20373  size_t work_dimension = 3;
20374 
20375  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20376  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20377  work_dimension = 1;
20378  }
20379  else if(temp_sz > 0){
20380  if (params->multi_dimensional){
20381  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20382  For default multidimensional global work size, leave the global_work_size vector empty, \
20383  and set multi_dimensional to true. Setting the global work size based on the values inside \
20384  the global_work_size vector.");
20385  }
20386  if (temp_sz == 1){
20387  size[0] = params->global_work_size[0];
20388  work_dimension = 1;
20389  }
20390  else if (temp_sz == 2){
20391  size[0] = params->global_work_size[0];
20392  size[1] = params->global_work_size[1];
20393  work_dimension = 2;
20394  }
20395  else{
20396  size[0] = params->global_work_size[0];
20397  size[1] = params->global_work_size[1];
20398  size[2] = params->global_work_size[2];
20399  if (temp_sz > 3){
20400  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20401  }
20402  }
20403  }
20404 
20405  cl_event gpuExec;
20406 
20407  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20408 
20409  clWaitForEvents(1, &gpuExec);
20410 
20411  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20412 
20413  clWaitForEvents(1, &gpuExec);
20414 
20415  float *result = (float *) malloc(typesz);
20416  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20417 
20418  v->assign(result, result+sz);
20419 
20420  clReleaseCommandQueue (queue);
20421  clReleaseMemObject(buffer);
20422  clReleaseMemObject(buffer2);
20423  clReleaseMemObject(buffer3);
20424  clReleaseEvent(gpuExec);
20425  free(result);
20426  }
20427 
20428  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
20429  size_t sz = v->size();
20430  size_t sz2 = v2->size();
20431  size_t sz3 = v3.size();
20432  size_t typesz = sizeof(float) * sz;
20433  size_t typesz2 = sizeof(int) * sz2;
20434  size_t typesz3 = sizeof(int) * sz3;
20435  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20436  if (temp_sz > 0){
20437  if (temp_sz > 2){
20438  if (temp_sz > 3){
20439  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20440  }
20441  typesz = sizeof(float) * params->buffers_size[0];
20442  typesz2 = sizeof(int) * params->buffers_size[1];
20443  typesz3 = sizeof(int) * params->buffers_size[2];
20444  }
20445  else{
20446  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20447  }
20448  }
20449  cl_int error = 0;
20450  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20451  checkError(error);
20452  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20453  checkError(error);
20454  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20455  checkError(error);
20456  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20457  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20458  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20459  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20460 
20461  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20462  checkError (error);
20463  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20464  checkError (error);
20465  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20466  checkError (error);
20467 
20468  size_t size[3] = {sz, sz2, sz3};
20469  size_t work_dimension = 3;
20470 
20471  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20472  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20473  work_dimension = 1;
20474  }
20475  else if(temp_sz > 0){
20476  if (params->multi_dimensional){
20477  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20478  For default multidimensional global work size, leave the global_work_size vector empty, \
20479  and set multi_dimensional to true. Setting the global work size based on the values inside \
20480  the global_work_size vector.");
20481  }
20482  if (temp_sz == 1){
20483  size[0] = params->global_work_size[0];
20484  work_dimension = 1;
20485  }
20486  else if (temp_sz == 2){
20487  size[0] = params->global_work_size[0];
20488  size[1] = params->global_work_size[1];
20489  work_dimension = 2;
20490  }
20491  else{
20492  size[0] = params->global_work_size[0];
20493  size[1] = params->global_work_size[1];
20494  size[2] = params->global_work_size[2];
20495  if (temp_sz > 3){
20496  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20497  }
20498  }
20499  }
20500 
20501  cl_event gpuExec;
20502 
20503  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20504 
20505  clWaitForEvents(1, &gpuExec);
20506 
20507  float *result = (float *) malloc(typesz);
20508  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20509 
20510  v->assign(result, result+sz);
20511 
20512  if (typesz2 != typesz or sz != sz2){
20513  int *result2;
20514  result2 = (int *) malloc(typesz2);
20515  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20516 
20517  v2->assign(result2, result2+sz2);
20518  free(result2);
20519  }
20520  else{
20521  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20522 
20523  v2->assign(result, result+sz2);
20524  }
20525 
20526  clReleaseCommandQueue (queue);
20527  clReleaseMemObject(buffer);
20528  clReleaseMemObject(buffer2);
20529  clReleaseMemObject(buffer3);
20530  clReleaseEvent(gpuExec);
20531  free(result);
20532  }
20533 
20534  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
20535  size_t sz = v->size();
20536  size_t sz2 = v2->size();
20537  size_t sz3 = v3->size();
20538  size_t typesz = sizeof(float) * sz;
20539  size_t typesz2 = sizeof(int) * sz2;
20540  size_t typesz3 = sizeof(int) * sz3;
20541  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20542  if (temp_sz > 0){
20543  if (temp_sz > 2){
20544  if (temp_sz > 3){
20545  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20546  }
20547  typesz = sizeof(float) * params->buffers_size[0];
20548  typesz2 = sizeof(int) * params->buffers_size[1];
20549  typesz3 = sizeof(int) * params->buffers_size[2];
20550  }
20551  else{
20552  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20553  }
20554  }
20555  cl_int error = 0;
20556  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20557  checkError(error);
20558  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20559  checkError(error);
20560  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20561  checkError(error);
20562  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20563  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20564  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20565  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20566 
20567  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20568  checkError (error);
20569  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20570  checkError (error);
20571  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20572  checkError (error);
20573 
20574  size_t size[3] = {sz, sz2, sz3};
20575  size_t work_dimension = 3;
20576 
20577  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20578  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20579  work_dimension = 1;
20580  }
20581  else if(temp_sz > 0){
20582  if (params->multi_dimensional){
20583  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20584  For default multidimensional global work size, leave the global_work_size vector empty, \
20585  and set multi_dimensional to true. Setting the global work size based on the values inside \
20586  the global_work_size vector.");
20587  }
20588  if (temp_sz == 1){
20589  size[0] = params->global_work_size[0];
20590  work_dimension = 1;
20591  }
20592  else if (temp_sz == 2){
20593  size[0] = params->global_work_size[0];
20594  size[1] = params->global_work_size[1];
20595  work_dimension = 2;
20596  }
20597  else{
20598  size[0] = params->global_work_size[0];
20599  size[1] = params->global_work_size[1];
20600  size[2] = params->global_work_size[2];
20601  if (temp_sz > 3){
20602  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20603  }
20604  }
20605  }
20606 
20607  cl_event gpuExec;
20608 
20609  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20610 
20611  clWaitForEvents(1, &gpuExec);
20612 
20613  float *result = (float *) malloc(typesz);
20614  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20615 
20616  v->assign(result, result+sz);
20617 
20618  if (typesz2 != typesz or sz != sz2){
20619  int *result2;
20620  result2 = (int *) malloc(typesz2);
20621  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20622 
20623  v2->assign(result2, result2+sz2);
20624  free(result2);
20625  }
20626  else{
20627  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20628 
20629  v2->assign(result, result+sz2);
20630  }
20631 
20632  if (typesz3 != typesz or sz != sz3){
20633  int *result3;
20634  result3 = (int *) malloc(typesz3);
20635  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
20636 
20637  v3->assign(result3, result3+sz3);
20638  free(result3);
20639  }
20640  else{
20641  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20642 
20643  v3->assign(result, result+sz3);
20644  }
20645 
20646  clReleaseCommandQueue (queue);
20647  clReleaseMemObject(buffer);
20648  clReleaseMemObject(buffer2);
20649  clReleaseMemObject(buffer3);
20650  clReleaseEvent(gpuExec);
20651  free(result);
20652  }
20653 
20654 
20655  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
20656  size_t sz = v.size();
20657  size_t sz2 = v2.size();
20658  size_t sz3 = v3.size();
20659  size_t typesz = sizeof(float) * sz;
20660  size_t typesz2 = sizeof(int) * sz2;
20661  size_t typesz3 = sizeof(float) * sz3;
20662  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20663  if (temp_sz > 0){
20664  if (temp_sz > 2){
20665  if (temp_sz > 3){
20666  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20667  }
20668  typesz = sizeof(float) * params->buffers_size[0];
20669  typesz2 = sizeof(int) * params->buffers_size[1];
20670  typesz3 = sizeof(float) * params->buffers_size[2];
20671  }
20672  else{
20673  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20674  }
20675  }
20676  cl_int error = 0;
20677  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20678  checkError(error);
20679  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20680  checkError(error);
20681  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20682  checkError(error);
20683  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20684  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20685  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20686  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20687 
20688  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
20689  checkError (error);
20690  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20691  checkError (error);
20692  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20693  checkError (error);
20694 
20695  size_t size[3] = {sz, sz2, sz3};
20696  size_t work_dimension = 3;
20697 
20698  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20699  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20700  work_dimension = 1;
20701  }
20702  else if(temp_sz > 0){
20703  if (params->multi_dimensional){
20704  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20705  For default multidimensional global work size, leave the global_work_size vector empty, \
20706  and set multi_dimensional to true. Setting the global work size based on the values inside \
20707  the global_work_size vector.");
20708  }
20709  if (temp_sz == 1){
20710  size[0] = params->global_work_size[0];
20711  work_dimension = 1;
20712  }
20713  else if (temp_sz == 2){
20714  size[0] = params->global_work_size[0];
20715  size[1] = params->global_work_size[1];
20716  work_dimension = 2;
20717  }
20718  else{
20719  size[0] = params->global_work_size[0];
20720  size[1] = params->global_work_size[1];
20721  size[2] = params->global_work_size[2];
20722  if (temp_sz > 3){
20723  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20724  }
20725  }
20726  }
20727 
20728  cl_event gpuExec;
20729 
20730  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20731 
20732  clWaitForEvents(1, &gpuExec);
20733 
20734  float *result = (float *) malloc(typesz);
20735  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20736 
20737  std::vector<float> res = std::vector<float>();
20738  res.assign(result, result+sz);
20739 
20740  clReleaseCommandQueue (queue);
20741  clReleaseMemObject(buffer);
20742  clReleaseMemObject(buffer2);
20743  clReleaseMemObject(buffer3);
20744  clReleaseEvent(gpuExec);
20745  free(result);
20746 
20747  return res;
20748  }
20749 
20750  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
20751  size_t sz = v->size();
20752  size_t sz2 = v2.size();
20753  size_t sz3 = v3.size();
20754  size_t typesz = sizeof(float) * sz;
20755  size_t typesz2 = sizeof(int) * sz2;
20756  size_t typesz3 = sizeof(float) * sz3;
20757  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20758  if (temp_sz > 0){
20759  if (temp_sz > 2){
20760  if (temp_sz > 3){
20761  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20762  }
20763  typesz = sizeof(float) * params->buffers_size[0];
20764  typesz2 = sizeof(int) * params->buffers_size[1];
20765  typesz3 = sizeof(float) * params->buffers_size[2];
20766  }
20767  else{
20768  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20769  }
20770  }
20771  cl_int error = 0;
20772  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20773  checkError(error);
20774  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20775  checkError(error);
20776  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20777  checkError(error);
20778  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20779  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20780  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20781  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20782 
20783  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20784  checkError (error);
20785  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20786  checkError (error);
20787  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20788  checkError (error);
20789 
20790  size_t size[3] = {sz, sz2, sz3};
20791  size_t work_dimension = 3;
20792 
20793  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20794  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20795  work_dimension = 1;
20796  }
20797  else if(temp_sz > 0){
20798  if (params->multi_dimensional){
20799  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20800  For default multidimensional global work size, leave the global_work_size vector empty, \
20801  and set multi_dimensional to true. Setting the global work size based on the values inside \
20802  the global_work_size vector.");
20803  }
20804  if (temp_sz == 1){
20805  size[0] = params->global_work_size[0];
20806  work_dimension = 1;
20807  }
20808  else if (temp_sz == 2){
20809  size[0] = params->global_work_size[0];
20810  size[1] = params->global_work_size[1];
20811  work_dimension = 2;
20812  }
20813  else{
20814  size[0] = params->global_work_size[0];
20815  size[1] = params->global_work_size[1];
20816  size[2] = params->global_work_size[2];
20817  if (temp_sz > 3){
20818  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20819  }
20820  }
20821  }
20822 
20823  cl_event gpuExec;
20824 
20825  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20826 
20827  clWaitForEvents(1, &gpuExec);
20828 
20829  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20830 
20831  clWaitForEvents(1, &gpuExec);
20832 
20833  float *result = (float *) malloc(typesz);
20834  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20835 
20836  v->assign(result, result+sz);
20837 
20838  clReleaseCommandQueue (queue);
20839  clReleaseMemObject(buffer);
20840  clReleaseMemObject(buffer2);
20841  clReleaseMemObject(buffer3);
20842  clReleaseEvent(gpuExec);
20843  free(result);
20844  }
20845 
20846  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
20847  size_t sz = v->size();
20848  size_t sz2 = v2->size();
20849  size_t sz3 = v3.size();
20850  size_t typesz = sizeof(float) * sz;
20851  size_t typesz2 = sizeof(int) * sz2;
20852  size_t typesz3 = sizeof(float) * sz3;
20853  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20854  if (temp_sz > 0){
20855  if (temp_sz > 2){
20856  if (temp_sz > 3){
20857  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20858  }
20859  typesz = sizeof(float) * params->buffers_size[0];
20860  typesz2 = sizeof(int) * params->buffers_size[1];
20861  typesz3 = sizeof(float) * params->buffers_size[2];
20862  }
20863  else{
20864  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20865  }
20866  }
20867  cl_int error = 0;
20868  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20869  checkError(error);
20870  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20871  checkError(error);
20872  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20873  checkError(error);
20874  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20875  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20876  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20877  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20878 
20879  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20880  checkError (error);
20881  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20882  checkError (error);
20883  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20884  checkError (error);
20885 
20886  size_t size[3] = {sz, sz2, sz3};
20887  size_t work_dimension = 3;
20888 
20889  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20890  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20891  work_dimension = 1;
20892  }
20893  else if(temp_sz > 0){
20894  if (params->multi_dimensional){
20895  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
20896  For default multidimensional global work size, leave the global_work_size vector empty, \
20897  and set multi_dimensional to true. Setting the global work size based on the values inside \
20898  the global_work_size vector.");
20899  }
20900  if (temp_sz == 1){
20901  size[0] = params->global_work_size[0];
20902  work_dimension = 1;
20903  }
20904  else if (temp_sz == 2){
20905  size[0] = params->global_work_size[0];
20906  size[1] = params->global_work_size[1];
20907  work_dimension = 2;
20908  }
20909  else{
20910  size[0] = params->global_work_size[0];
20911  size[1] = params->global_work_size[1];
20912  size[2] = params->global_work_size[2];
20913  if (temp_sz > 3){
20914  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20915  }
20916  }
20917  }
20918 
20919  cl_event gpuExec;
20920 
20921  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20922 
20923  clWaitForEvents(1, &gpuExec);
20924 
20925  float *result = (float *) malloc(typesz);
20926  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20927 
20928  v->assign(result, result+sz);
20929 
20930  if (typesz2 != typesz or sz != sz2){
20931  int *result2;
20932  result2 = (int *) malloc(typesz2);
20933  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20934 
20935  v2->assign(result2, result2+sz2);
20936  free(result2);
20937  }
20938  else{
20939  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20940 
20941  v2->assign(result, result+sz2);
20942  }
20943 
20944  clReleaseCommandQueue (queue);
20945  clReleaseMemObject(buffer);
20946  clReleaseMemObject(buffer2);
20947  clReleaseMemObject(buffer3);
20948  clReleaseEvent(gpuExec);
20949  free(result);
20950  }
20951 
20952  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
20953  size_t sz = v->size();
20954  size_t sz2 = v2->size();
20955  size_t sz3 = v3->size();
20956  size_t typesz = sizeof(float) * sz;
20957  size_t typesz2 = sizeof(int) * sz2;
20958  size_t typesz3 = sizeof(float) * sz3;
20959  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
20960  if (temp_sz > 0){
20961  if (temp_sz > 2){
20962  if (temp_sz > 3){
20963  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20964  }
20965  typesz = sizeof(float) * params->buffers_size[0];
20966  typesz2 = sizeof(int) * params->buffers_size[1];
20967  typesz3 = sizeof(float) * params->buffers_size[2];
20968  }
20969  else{
20970  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20971  }
20972  }
20973  cl_int error = 0;
20974  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20975  checkError(error);
20976  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20977  checkError(error);
20978  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20979  checkError(error);
20980  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
20981  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
20982  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
20983  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
20984 
20985  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20986  checkError (error);
20987  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20988  checkError (error);
20989  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20990  checkError (error);
20991 
20992  size_t size[3] = {sz, sz2, sz3};
20993  size_t work_dimension = 3;
20994 
20995  temp_sz = params != NULL ? params->global_work_size.size() : 0;
20996  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
20997  work_dimension = 1;
20998  }
20999  else if(temp_sz > 0){
21000  if (params->multi_dimensional){
21001  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21002  For default multidimensional global work size, leave the global_work_size vector empty, \
21003  and set multi_dimensional to true. Setting the global work size based on the values inside \
21004  the global_work_size vector.");
21005  }
21006  if (temp_sz == 1){
21007  size[0] = params->global_work_size[0];
21008  work_dimension = 1;
21009  }
21010  else if (temp_sz == 2){
21011  size[0] = params->global_work_size[0];
21012  size[1] = params->global_work_size[1];
21013  work_dimension = 2;
21014  }
21015  else{
21016  size[0] = params->global_work_size[0];
21017  size[1] = params->global_work_size[1];
21018  size[2] = params->global_work_size[2];
21019  if (temp_sz > 3){
21020  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21021  }
21022  }
21023  }
21024 
21025  cl_event gpuExec;
21026 
21027  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21028 
21029  clWaitForEvents(1, &gpuExec);
21030 
21031  float *result = (float *) malloc(typesz);
21032  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21033 
21034  v->assign(result, result+sz);
21035 
21036  if (typesz2 != typesz or sz != sz2){
21037  int *result2;
21038  result2 = (int *) malloc(typesz2);
21039  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21040 
21041  v2->assign(result2, result2+sz2);
21042  free(result2);
21043  }
21044  else{
21045  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21046 
21047  v2->assign(result, result+sz2);
21048  }
21049 
21050  if (typesz3 != typesz or sz != sz3){
21051  float *result3;
21052  result3 = (float *) malloc(typesz3);
21053  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21054 
21055  v3->assign(result3, result3+sz3);
21056  free(result3);
21057  }
21058  else{
21059  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21060 
21061  v3->assign(result, result+sz3);
21062  }
21063 
21064  clReleaseCommandQueue (queue);
21065  clReleaseMemObject(buffer);
21066  clReleaseMemObject(buffer2);
21067  clReleaseMemObject(buffer3);
21068  clReleaseEvent(gpuExec);
21069  free(result);
21070  }
21071 
21072 
21073  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
21074  size_t sz = v.size();
21075  size_t sz2 = v2.size();
21076  size_t sz3 = v3.size();
21077  size_t typesz = sizeof(float) * sz;
21078  size_t typesz2 = sizeof(int) * sz2;
21079  size_t typesz3 = sizeof(double) * sz3;
21080  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21081  if (temp_sz > 0){
21082  if (temp_sz > 2){
21083  if (temp_sz > 3){
21084  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21085  }
21086  typesz = sizeof(float) * params->buffers_size[0];
21087  typesz2 = sizeof(int) * params->buffers_size[1];
21088  typesz3 = sizeof(double) * params->buffers_size[2];
21089  }
21090  else{
21091  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21092  }
21093  }
21094  cl_int error = 0;
21095  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21096  checkError(error);
21097  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21098  checkError(error);
21099  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21100  checkError(error);
21101  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21102  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21103  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21104  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21105 
21106  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21107  checkError (error);
21108  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21109  checkError (error);
21110  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21111  checkError (error);
21112 
21113  size_t size[3] = {sz, sz2, sz3};
21114  size_t work_dimension = 3;
21115 
21116  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21117  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21118  work_dimension = 1;
21119  }
21120  else if(temp_sz > 0){
21121  if (params->multi_dimensional){
21122  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21123  For default multidimensional global work size, leave the global_work_size vector empty, \
21124  and set multi_dimensional to true. Setting the global work size based on the values inside \
21125  the global_work_size vector.");
21126  }
21127  if (temp_sz == 1){
21128  size[0] = params->global_work_size[0];
21129  work_dimension = 1;
21130  }
21131  else if (temp_sz == 2){
21132  size[0] = params->global_work_size[0];
21133  size[1] = params->global_work_size[1];
21134  work_dimension = 2;
21135  }
21136  else{
21137  size[0] = params->global_work_size[0];
21138  size[1] = params->global_work_size[1];
21139  size[2] = params->global_work_size[2];
21140  if (temp_sz > 3){
21141  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21142  }
21143  }
21144  }
21145 
21146  cl_event gpuExec;
21147 
21148  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21149 
21150  clWaitForEvents(1, &gpuExec);
21151 
21152  float *result = (float *) malloc(typesz);
21153  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21154 
21155  std::vector<float> res = std::vector<float>();
21156  res.assign(result, result+sz);
21157 
21158  clReleaseCommandQueue (queue);
21159  clReleaseMemObject(buffer);
21160  clReleaseMemObject(buffer2);
21161  clReleaseMemObject(buffer3);
21162  clReleaseEvent(gpuExec);
21163  free(result);
21164 
21165  return res;
21166  }
21167 
21168  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
21169  size_t sz = v->size();
21170  size_t sz2 = v2.size();
21171  size_t sz3 = v3.size();
21172  size_t typesz = sizeof(float) * sz;
21173  size_t typesz2 = sizeof(int) * sz2;
21174  size_t typesz3 = sizeof(double) * sz3;
21175  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21176  if (temp_sz > 0){
21177  if (temp_sz > 2){
21178  if (temp_sz > 3){
21179  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21180  }
21181  typesz = sizeof(float) * params->buffers_size[0];
21182  typesz2 = sizeof(int) * params->buffers_size[1];
21183  typesz3 = sizeof(double) * params->buffers_size[2];
21184  }
21185  else{
21186  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21187  }
21188  }
21189  cl_int error = 0;
21190  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21191  checkError(error);
21192  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21193  checkError(error);
21194  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21195  checkError(error);
21196  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21197  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21198  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21199  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21200 
21201  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21202  checkError (error);
21203  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21204  checkError (error);
21205  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21206  checkError (error);
21207 
21208  size_t size[3] = {sz, sz2, sz3};
21209  size_t work_dimension = 3;
21210 
21211  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21212  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21213  work_dimension = 1;
21214  }
21215  else if(temp_sz > 0){
21216  if (params->multi_dimensional){
21217  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21218  For default multidimensional global work size, leave the global_work_size vector empty, \
21219  and set multi_dimensional to true. Setting the global work size based on the values inside \
21220  the global_work_size vector.");
21221  }
21222  if (temp_sz == 1){
21223  size[0] = params->global_work_size[0];
21224  work_dimension = 1;
21225  }
21226  else if (temp_sz == 2){
21227  size[0] = params->global_work_size[0];
21228  size[1] = params->global_work_size[1];
21229  work_dimension = 2;
21230  }
21231  else{
21232  size[0] = params->global_work_size[0];
21233  size[1] = params->global_work_size[1];
21234  size[2] = params->global_work_size[2];
21235  if (temp_sz > 3){
21236  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21237  }
21238  }
21239  }
21240 
21241  cl_event gpuExec;
21242 
21243  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21244 
21245  clWaitForEvents(1, &gpuExec);
21246 
21247  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21248 
21249  clWaitForEvents(1, &gpuExec);
21250 
21251  float *result = (float *) malloc(typesz);
21252  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21253 
21254  v->assign(result, result+sz);
21255 
21256  clReleaseCommandQueue (queue);
21257  clReleaseMemObject(buffer);
21258  clReleaseMemObject(buffer2);
21259  clReleaseMemObject(buffer3);
21260  clReleaseEvent(gpuExec);
21261  free(result);
21262  }
21263 
21264  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
21265  size_t sz = v->size();
21266  size_t sz2 = v2->size();
21267  size_t sz3 = v3.size();
21268  size_t typesz = sizeof(float) * sz;
21269  size_t typesz2 = sizeof(int) * sz2;
21270  size_t typesz3 = sizeof(double) * sz3;
21271  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21272  if (temp_sz > 0){
21273  if (temp_sz > 2){
21274  if (temp_sz > 3){
21275  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21276  }
21277  typesz = sizeof(float) * params->buffers_size[0];
21278  typesz2 = sizeof(int) * params->buffers_size[1];
21279  typesz3 = sizeof(double) * params->buffers_size[2];
21280  }
21281  else{
21282  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21283  }
21284  }
21285  cl_int error = 0;
21286  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21287  checkError(error);
21288  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21289  checkError(error);
21290  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21291  checkError(error);
21292  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21293  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21294  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21295  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21296 
21297  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21298  checkError (error);
21299  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21300  checkError (error);
21301  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21302  checkError (error);
21303 
21304  size_t size[3] = {sz, sz2, sz3};
21305  size_t work_dimension = 3;
21306 
21307  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21308  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21309  work_dimension = 1;
21310  }
21311  else if(temp_sz > 0){
21312  if (params->multi_dimensional){
21313  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21314  For default multidimensional global work size, leave the global_work_size vector empty, \
21315  and set multi_dimensional to true. Setting the global work size based on the values inside \
21316  the global_work_size vector.");
21317  }
21318  if (temp_sz == 1){
21319  size[0] = params->global_work_size[0];
21320  work_dimension = 1;
21321  }
21322  else if (temp_sz == 2){
21323  size[0] = params->global_work_size[0];
21324  size[1] = params->global_work_size[1];
21325  work_dimension = 2;
21326  }
21327  else{
21328  size[0] = params->global_work_size[0];
21329  size[1] = params->global_work_size[1];
21330  size[2] = params->global_work_size[2];
21331  if (temp_sz > 3){
21332  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21333  }
21334  }
21335  }
21336 
21337  cl_event gpuExec;
21338 
21339  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21340 
21341  clWaitForEvents(1, &gpuExec);
21342 
21343  float *result = (float *) malloc(typesz);
21344  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21345 
21346  v->assign(result, result+sz);
21347 
21348  if (typesz2 != typesz or sz != sz2){
21349  int *result2;
21350  result2 = (int *) malloc(typesz2);
21351  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21352 
21353  v2->assign(result2, result2+sz2);
21354  free(result2);
21355  }
21356  else{
21357  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21358 
21359  v2->assign(result, result+sz2);
21360  }
21361 
21362  clReleaseCommandQueue (queue);
21363  clReleaseMemObject(buffer);
21364  clReleaseMemObject(buffer2);
21365  clReleaseMemObject(buffer3);
21366  clReleaseEvent(gpuExec);
21367  free(result);
21368  }
21369 
21370  void ROS_OpenCL::process(std::vector<float>* v, std::vector<int>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
21371  size_t sz = v->size();
21372  size_t sz2 = v2->size();
21373  size_t sz3 = v3->size();
21374  size_t typesz = sizeof(float) * sz;
21375  size_t typesz2 = sizeof(int) * sz2;
21376  size_t typesz3 = sizeof(double) * sz3;
21377  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21378  if (temp_sz > 0){
21379  if (temp_sz > 2){
21380  if (temp_sz > 3){
21381  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21382  }
21383  typesz = sizeof(float) * params->buffers_size[0];
21384  typesz2 = sizeof(int) * params->buffers_size[1];
21385  typesz3 = sizeof(double) * params->buffers_size[2];
21386  }
21387  else{
21388  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21389  }
21390  }
21391  cl_int error = 0;
21392  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21393  checkError(error);
21394  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21395  checkError(error);
21396  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
21397  checkError(error);
21398  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21399  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21400  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21401  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21402 
21403  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21404  checkError (error);
21405  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21406  checkError (error);
21407  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
21408  checkError (error);
21409 
21410  size_t size[3] = {sz, sz2, sz3};
21411  size_t work_dimension = 3;
21412 
21413  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21414  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21415  work_dimension = 1;
21416  }
21417  else if(temp_sz > 0){
21418  if (params->multi_dimensional){
21419  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21420  For default multidimensional global work size, leave the global_work_size vector empty, \
21421  and set multi_dimensional to true. Setting the global work size based on the values inside \
21422  the global_work_size vector.");
21423  }
21424  if (temp_sz == 1){
21425  size[0] = params->global_work_size[0];
21426  work_dimension = 1;
21427  }
21428  else if (temp_sz == 2){
21429  size[0] = params->global_work_size[0];
21430  size[1] = params->global_work_size[1];
21431  work_dimension = 2;
21432  }
21433  else{
21434  size[0] = params->global_work_size[0];
21435  size[1] = params->global_work_size[1];
21436  size[2] = params->global_work_size[2];
21437  if (temp_sz > 3){
21438  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21439  }
21440  }
21441  }
21442 
21443  cl_event gpuExec;
21444 
21445  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21446 
21447  clWaitForEvents(1, &gpuExec);
21448 
21449  float *result = (float *) malloc(typesz);
21450  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21451 
21452  v->assign(result, result+sz);
21453 
21454  if (typesz2 != typesz or sz != sz2){
21455  int *result2;
21456  result2 = (int *) malloc(typesz2);
21457  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21458 
21459  v2->assign(result2, result2+sz2);
21460  free(result2);
21461  }
21462  else{
21463  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21464 
21465  v2->assign(result, result+sz2);
21466  }
21467 
21468  if (typesz3 != typesz or sz != sz3){
21469  double *result3;
21470  result3 = (double *) malloc(typesz3);
21471  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21472 
21473  v3->assign(result3, result3+sz3);
21474  free(result3);
21475  }
21476  else{
21477  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21478 
21479  v3->assign(result, result+sz3);
21480  }
21481 
21482  clReleaseCommandQueue (queue);
21483  clReleaseMemObject(buffer);
21484  clReleaseMemObject(buffer2);
21485  clReleaseMemObject(buffer3);
21486  clReleaseEvent(gpuExec);
21487  free(result);
21488  }
21489 
21490 
21491  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
21492  size_t sz = v.size();
21493  size_t sz2 = v2.size();
21494  size_t sz3 = v3.size();
21495  size_t typesz = sizeof(float) * sz;
21496  size_t typesz2 = sizeof(float) * sz2;
21497  size_t typesz3 = sizeof(char) * sz3;
21498  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21499  if (temp_sz > 0){
21500  if (temp_sz > 2){
21501  if (temp_sz > 3){
21502  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21503  }
21504  typesz = sizeof(float) * params->buffers_size[0];
21505  typesz2 = sizeof(float) * params->buffers_size[1];
21506  typesz3 = sizeof(char) * params->buffers_size[2];
21507  }
21508  else{
21509  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21510  }
21511  }
21512  cl_int error = 0;
21513  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21514  checkError(error);
21515  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21516  checkError(error);
21517  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21518  checkError(error);
21519  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21520  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21521  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21522  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21523 
21524  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21525  checkError (error);
21526  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21527  checkError (error);
21528  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21529  checkError (error);
21530 
21531  size_t size[3] = {sz, sz2, sz3};
21532  size_t work_dimension = 3;
21533 
21534  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21535  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21536  work_dimension = 1;
21537  }
21538  else if(temp_sz > 0){
21539  if (params->multi_dimensional){
21540  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21541  For default multidimensional global work size, leave the global_work_size vector empty, \
21542  and set multi_dimensional to true. Setting the global work size based on the values inside \
21543  the global_work_size vector.");
21544  }
21545  if (temp_sz == 1){
21546  size[0] = params->global_work_size[0];
21547  work_dimension = 1;
21548  }
21549  else if (temp_sz == 2){
21550  size[0] = params->global_work_size[0];
21551  size[1] = params->global_work_size[1];
21552  work_dimension = 2;
21553  }
21554  else{
21555  size[0] = params->global_work_size[0];
21556  size[1] = params->global_work_size[1];
21557  size[2] = params->global_work_size[2];
21558  if (temp_sz > 3){
21559  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21560  }
21561  }
21562  }
21563 
21564  cl_event gpuExec;
21565 
21566  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21567 
21568  clWaitForEvents(1, &gpuExec);
21569 
21570  float *result = (float *) malloc(typesz);
21571  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21572 
21573  std::vector<float> res = std::vector<float>();
21574  res.assign(result, result+sz);
21575 
21576  clReleaseCommandQueue (queue);
21577  clReleaseMemObject(buffer);
21578  clReleaseMemObject(buffer2);
21579  clReleaseMemObject(buffer3);
21580  clReleaseEvent(gpuExec);
21581  free(result);
21582 
21583  return res;
21584  }
21585 
21586  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
21587  size_t sz = v->size();
21588  size_t sz2 = v2.size();
21589  size_t sz3 = v3.size();
21590  size_t typesz = sizeof(float) * sz;
21591  size_t typesz2 = sizeof(float) * sz2;
21592  size_t typesz3 = sizeof(char) * sz3;
21593  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21594  if (temp_sz > 0){
21595  if (temp_sz > 2){
21596  if (temp_sz > 3){
21597  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21598  }
21599  typesz = sizeof(float) * params->buffers_size[0];
21600  typesz2 = sizeof(float) * params->buffers_size[1];
21601  typesz3 = sizeof(char) * params->buffers_size[2];
21602  }
21603  else{
21604  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21605  }
21606  }
21607  cl_int error = 0;
21608  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21609  checkError(error);
21610  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21611  checkError(error);
21612  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21613  checkError(error);
21614  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21615  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21616  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21617  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21618 
21619  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21620  checkError (error);
21621  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21622  checkError (error);
21623  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21624  checkError (error);
21625 
21626  size_t size[3] = {sz, sz2, sz3};
21627  size_t work_dimension = 3;
21628 
21629  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21630  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21631  work_dimension = 1;
21632  }
21633  else if(temp_sz > 0){
21634  if (params->multi_dimensional){
21635  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21636  For default multidimensional global work size, leave the global_work_size vector empty, \
21637  and set multi_dimensional to true. Setting the global work size based on the values inside \
21638  the global_work_size vector.");
21639  }
21640  if (temp_sz == 1){
21641  size[0] = params->global_work_size[0];
21642  work_dimension = 1;
21643  }
21644  else if (temp_sz == 2){
21645  size[0] = params->global_work_size[0];
21646  size[1] = params->global_work_size[1];
21647  work_dimension = 2;
21648  }
21649  else{
21650  size[0] = params->global_work_size[0];
21651  size[1] = params->global_work_size[1];
21652  size[2] = params->global_work_size[2];
21653  if (temp_sz > 3){
21654  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21655  }
21656  }
21657  }
21658 
21659  cl_event gpuExec;
21660 
21661  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21662 
21663  clWaitForEvents(1, &gpuExec);
21664 
21665  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21666 
21667  clWaitForEvents(1, &gpuExec);
21668 
21669  float *result = (float *) malloc(typesz);
21670  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21671 
21672  v->assign(result, result+sz);
21673 
21674  clReleaseCommandQueue (queue);
21675  clReleaseMemObject(buffer);
21676  clReleaseMemObject(buffer2);
21677  clReleaseMemObject(buffer3);
21678  clReleaseEvent(gpuExec);
21679  free(result);
21680  }
21681 
21682  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
21683  size_t sz = v->size();
21684  size_t sz2 = v2->size();
21685  size_t sz3 = v3.size();
21686  size_t typesz = sizeof(float) * sz;
21687  size_t typesz2 = sizeof(float) * sz2;
21688  size_t typesz3 = sizeof(char) * sz3;
21689  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21690  if (temp_sz > 0){
21691  if (temp_sz > 2){
21692  if (temp_sz > 3){
21693  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21694  }
21695  typesz = sizeof(float) * params->buffers_size[0];
21696  typesz2 = sizeof(float) * params->buffers_size[1];
21697  typesz3 = sizeof(char) * params->buffers_size[2];
21698  }
21699  else{
21700  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21701  }
21702  }
21703  cl_int error = 0;
21704  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21705  checkError(error);
21706  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21707  checkError(error);
21708  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21709  checkError(error);
21710  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21711  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21712  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21713  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21714 
21715  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21716  checkError (error);
21717  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21718  checkError (error);
21719  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21720  checkError (error);
21721 
21722  size_t size[3] = {sz, sz2, sz3};
21723  size_t work_dimension = 3;
21724 
21725  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21726  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21727  work_dimension = 1;
21728  }
21729  else if(temp_sz > 0){
21730  if (params->multi_dimensional){
21731  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21732  For default multidimensional global work size, leave the global_work_size vector empty, \
21733  and set multi_dimensional to true. Setting the global work size based on the values inside \
21734  the global_work_size vector.");
21735  }
21736  if (temp_sz == 1){
21737  size[0] = params->global_work_size[0];
21738  work_dimension = 1;
21739  }
21740  else if (temp_sz == 2){
21741  size[0] = params->global_work_size[0];
21742  size[1] = params->global_work_size[1];
21743  work_dimension = 2;
21744  }
21745  else{
21746  size[0] = params->global_work_size[0];
21747  size[1] = params->global_work_size[1];
21748  size[2] = params->global_work_size[2];
21749  if (temp_sz > 3){
21750  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21751  }
21752  }
21753  }
21754 
21755  cl_event gpuExec;
21756 
21757  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21758 
21759  clWaitForEvents(1, &gpuExec);
21760 
21761  float *result = (float *) malloc(typesz);
21762  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21763 
21764  v->assign(result, result+sz);
21765 
21766  if (typesz2 != typesz or sz != sz2){
21767  float *result2;
21768  result2 = (float *) malloc(typesz2);
21769  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21770 
21771  v2->assign(result2, result2+sz2);
21772  free(result2);
21773  }
21774  else{
21775  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21776 
21777  v2->assign(result, result+sz2);
21778  }
21779 
21780  clReleaseCommandQueue (queue);
21781  clReleaseMemObject(buffer);
21782  clReleaseMemObject(buffer2);
21783  clReleaseMemObject(buffer3);
21784  clReleaseEvent(gpuExec);
21785  free(result);
21786  }
21787 
21788  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
21789  size_t sz = v->size();
21790  size_t sz2 = v2->size();
21791  size_t sz3 = v3->size();
21792  size_t typesz = sizeof(float) * sz;
21793  size_t typesz2 = sizeof(float) * sz2;
21794  size_t typesz3 = sizeof(char) * sz3;
21795  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21796  if (temp_sz > 0){
21797  if (temp_sz > 2){
21798  if (temp_sz > 3){
21799  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21800  }
21801  typesz = sizeof(float) * params->buffers_size[0];
21802  typesz2 = sizeof(float) * params->buffers_size[1];
21803  typesz3 = sizeof(char) * params->buffers_size[2];
21804  }
21805  else{
21806  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21807  }
21808  }
21809  cl_int error = 0;
21810  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21811  checkError(error);
21812  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21813  checkError(error);
21814  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
21815  checkError(error);
21816  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21817  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21818  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21819  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21820 
21821  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21822  checkError (error);
21823  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21824  checkError (error);
21825  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
21826  checkError (error);
21827 
21828  size_t size[3] = {sz, sz2, sz3};
21829  size_t work_dimension = 3;
21830 
21831  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21832  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21833  work_dimension = 1;
21834  }
21835  else if(temp_sz > 0){
21836  if (params->multi_dimensional){
21837  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21838  For default multidimensional global work size, leave the global_work_size vector empty, \
21839  and set multi_dimensional to true. Setting the global work size based on the values inside \
21840  the global_work_size vector.");
21841  }
21842  if (temp_sz == 1){
21843  size[0] = params->global_work_size[0];
21844  work_dimension = 1;
21845  }
21846  else if (temp_sz == 2){
21847  size[0] = params->global_work_size[0];
21848  size[1] = params->global_work_size[1];
21849  work_dimension = 2;
21850  }
21851  else{
21852  size[0] = params->global_work_size[0];
21853  size[1] = params->global_work_size[1];
21854  size[2] = params->global_work_size[2];
21855  if (temp_sz > 3){
21856  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21857  }
21858  }
21859  }
21860 
21861  cl_event gpuExec;
21862 
21863  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21864 
21865  clWaitForEvents(1, &gpuExec);
21866 
21867  float *result = (float *) malloc(typesz);
21868  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21869 
21870  v->assign(result, result+sz);
21871 
21872  if (typesz2 != typesz or sz != sz2){
21873  float *result2;
21874  result2 = (float *) malloc(typesz2);
21875  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21876 
21877  v2->assign(result2, result2+sz2);
21878  free(result2);
21879  }
21880  else{
21881  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21882 
21883  v2->assign(result, result+sz2);
21884  }
21885 
21886  if (typesz3 != typesz or sz != sz3){
21887  char *result3;
21888  result3 = (char *) malloc(typesz3);
21889  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21890 
21891  v3->assign(result3, result3+sz3);
21892  free(result3);
21893  }
21894  else{
21895  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21896 
21897  v3->assign(result, result+sz3);
21898  }
21899 
21900  clReleaseCommandQueue (queue);
21901  clReleaseMemObject(buffer);
21902  clReleaseMemObject(buffer2);
21903  clReleaseMemObject(buffer3);
21904  clReleaseEvent(gpuExec);
21905  free(result);
21906  }
21907 
21908 
21909  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
21910  size_t sz = v.size();
21911  size_t sz2 = v2.size();
21912  size_t sz3 = v3.size();
21913  size_t typesz = sizeof(float) * sz;
21914  size_t typesz2 = sizeof(float) * sz2;
21915  size_t typesz3 = sizeof(int) * sz3;
21916  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
21917  if (temp_sz > 0){
21918  if (temp_sz > 2){
21919  if (temp_sz > 3){
21920  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21921  }
21922  typesz = sizeof(float) * params->buffers_size[0];
21923  typesz2 = sizeof(float) * params->buffers_size[1];
21924  typesz3 = sizeof(int) * params->buffers_size[2];
21925  }
21926  else{
21927  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21928  }
21929  }
21930  cl_int error = 0;
21931  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21932  checkError(error);
21933  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21934  checkError(error);
21935  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21936  checkError(error);
21937  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
21938  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
21939  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
21940  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
21941 
21942  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21943  checkError (error);
21944  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21945  checkError (error);
21946  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21947  checkError (error);
21948 
21949  size_t size[3] = {sz, sz2, sz3};
21950  size_t work_dimension = 3;
21951 
21952  temp_sz = params != NULL ? params->global_work_size.size() : 0;
21953  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
21954  work_dimension = 1;
21955  }
21956  else if(temp_sz > 0){
21957  if (params->multi_dimensional){
21958  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
21959  For default multidimensional global work size, leave the global_work_size vector empty, \
21960  and set multi_dimensional to true. Setting the global work size based on the values inside \
21961  the global_work_size vector.");
21962  }
21963  if (temp_sz == 1){
21964  size[0] = params->global_work_size[0];
21965  work_dimension = 1;
21966  }
21967  else if (temp_sz == 2){
21968  size[0] = params->global_work_size[0];
21969  size[1] = params->global_work_size[1];
21970  work_dimension = 2;
21971  }
21972  else{
21973  size[0] = params->global_work_size[0];
21974  size[1] = params->global_work_size[1];
21975  size[2] = params->global_work_size[2];
21976  if (temp_sz > 3){
21977  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21978  }
21979  }
21980  }
21981 
21982  cl_event gpuExec;
21983 
21984  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21985 
21986  clWaitForEvents(1, &gpuExec);
21987 
21988  float *result = (float *) malloc(typesz);
21989  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21990 
21991  std::vector<float> res = std::vector<float>();
21992  res.assign(result, result+sz);
21993 
21994  clReleaseCommandQueue (queue);
21995  clReleaseMemObject(buffer);
21996  clReleaseMemObject(buffer2);
21997  clReleaseMemObject(buffer3);
21998  clReleaseEvent(gpuExec);
21999  free(result);
22000 
22001  return res;
22002  }
22003 
22004  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
22005  size_t sz = v->size();
22006  size_t sz2 = v2.size();
22007  size_t sz3 = v3.size();
22008  size_t typesz = sizeof(float) * sz;
22009  size_t typesz2 = sizeof(float) * sz2;
22010  size_t typesz3 = sizeof(int) * sz3;
22011  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22012  if (temp_sz > 0){
22013  if (temp_sz > 2){
22014  if (temp_sz > 3){
22015  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22016  }
22017  typesz = sizeof(float) * params->buffers_size[0];
22018  typesz2 = sizeof(float) * params->buffers_size[1];
22019  typesz3 = sizeof(int) * params->buffers_size[2];
22020  }
22021  else{
22022  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22023  }
22024  }
22025  cl_int error = 0;
22026  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22027  checkError(error);
22028  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22029  checkError(error);
22030  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22031  checkError(error);
22032  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22033  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22034  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22035  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22036 
22037  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22038  checkError (error);
22039  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22040  checkError (error);
22041  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22042  checkError (error);
22043 
22044  size_t size[3] = {sz, sz2, sz3};
22045  size_t work_dimension = 3;
22046 
22047  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22048  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22049  work_dimension = 1;
22050  }
22051  else if(temp_sz > 0){
22052  if (params->multi_dimensional){
22053  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22054  For default multidimensional global work size, leave the global_work_size vector empty, \
22055  and set multi_dimensional to true. Setting the global work size based on the values inside \
22056  the global_work_size vector.");
22057  }
22058  if (temp_sz == 1){
22059  size[0] = params->global_work_size[0];
22060  work_dimension = 1;
22061  }
22062  else if (temp_sz == 2){
22063  size[0] = params->global_work_size[0];
22064  size[1] = params->global_work_size[1];
22065  work_dimension = 2;
22066  }
22067  else{
22068  size[0] = params->global_work_size[0];
22069  size[1] = params->global_work_size[1];
22070  size[2] = params->global_work_size[2];
22071  if (temp_sz > 3){
22072  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22073  }
22074  }
22075  }
22076 
22077  cl_event gpuExec;
22078 
22079  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22080 
22081  clWaitForEvents(1, &gpuExec);
22082 
22083  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22084 
22085  clWaitForEvents(1, &gpuExec);
22086 
22087  float *result = (float *) malloc(typesz);
22088  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22089 
22090  v->assign(result, result+sz);
22091 
22092  clReleaseCommandQueue (queue);
22093  clReleaseMemObject(buffer);
22094  clReleaseMemObject(buffer2);
22095  clReleaseMemObject(buffer3);
22096  clReleaseEvent(gpuExec);
22097  free(result);
22098  }
22099 
22100  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
22101  size_t sz = v->size();
22102  size_t sz2 = v2->size();
22103  size_t sz3 = v3.size();
22104  size_t typesz = sizeof(float) * sz;
22105  size_t typesz2 = sizeof(float) * sz2;
22106  size_t typesz3 = sizeof(int) * sz3;
22107  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22108  if (temp_sz > 0){
22109  if (temp_sz > 2){
22110  if (temp_sz > 3){
22111  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22112  }
22113  typesz = sizeof(float) * params->buffers_size[0];
22114  typesz2 = sizeof(float) * params->buffers_size[1];
22115  typesz3 = sizeof(int) * params->buffers_size[2];
22116  }
22117  else{
22118  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22119  }
22120  }
22121  cl_int error = 0;
22122  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22123  checkError(error);
22124  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22125  checkError(error);
22126  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22127  checkError(error);
22128  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22129  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22130  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22131  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22132 
22133  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22134  checkError (error);
22135  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22136  checkError (error);
22137  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22138  checkError (error);
22139 
22140  size_t size[3] = {sz, sz2, sz3};
22141  size_t work_dimension = 3;
22142 
22143  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22144  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22145  work_dimension = 1;
22146  }
22147  else if(temp_sz > 0){
22148  if (params->multi_dimensional){
22149  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22150  For default multidimensional global work size, leave the global_work_size vector empty, \
22151  and set multi_dimensional to true. Setting the global work size based on the values inside \
22152  the global_work_size vector.");
22153  }
22154  if (temp_sz == 1){
22155  size[0] = params->global_work_size[0];
22156  work_dimension = 1;
22157  }
22158  else if (temp_sz == 2){
22159  size[0] = params->global_work_size[0];
22160  size[1] = params->global_work_size[1];
22161  work_dimension = 2;
22162  }
22163  else{
22164  size[0] = params->global_work_size[0];
22165  size[1] = params->global_work_size[1];
22166  size[2] = params->global_work_size[2];
22167  if (temp_sz > 3){
22168  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22169  }
22170  }
22171  }
22172 
22173  cl_event gpuExec;
22174 
22175  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22176 
22177  clWaitForEvents(1, &gpuExec);
22178 
22179  float *result = (float *) malloc(typesz);
22180  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22181 
22182  v->assign(result, result+sz);
22183 
22184  if (typesz2 != typesz or sz != sz2){
22185  float *result2;
22186  result2 = (float *) malloc(typesz2);
22187  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22188 
22189  v2->assign(result2, result2+sz2);
22190  free(result2);
22191  }
22192  else{
22193  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22194 
22195  v2->assign(result, result+sz2);
22196  }
22197 
22198  clReleaseCommandQueue (queue);
22199  clReleaseMemObject(buffer);
22200  clReleaseMemObject(buffer2);
22201  clReleaseMemObject(buffer3);
22202  clReleaseEvent(gpuExec);
22203  free(result);
22204  }
22205 
22206  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
22207  size_t sz = v->size();
22208  size_t sz2 = v2->size();
22209  size_t sz3 = v3->size();
22210  size_t typesz = sizeof(float) * sz;
22211  size_t typesz2 = sizeof(float) * sz2;
22212  size_t typesz3 = sizeof(int) * sz3;
22213  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22214  if (temp_sz > 0){
22215  if (temp_sz > 2){
22216  if (temp_sz > 3){
22217  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22218  }
22219  typesz = sizeof(float) * params->buffers_size[0];
22220  typesz2 = sizeof(float) * params->buffers_size[1];
22221  typesz3 = sizeof(int) * params->buffers_size[2];
22222  }
22223  else{
22224  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22225  }
22226  }
22227  cl_int error = 0;
22228  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22229  checkError(error);
22230  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22231  checkError(error);
22232  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
22233  checkError(error);
22234  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22235  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22236  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22237  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22238 
22239  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22240  checkError (error);
22241  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22242  checkError (error);
22243  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
22244  checkError (error);
22245 
22246  size_t size[3] = {sz, sz2, sz3};
22247  size_t work_dimension = 3;
22248 
22249  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22250  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22251  work_dimension = 1;
22252  }
22253  else if(temp_sz > 0){
22254  if (params->multi_dimensional){
22255  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22256  For default multidimensional global work size, leave the global_work_size vector empty, \
22257  and set multi_dimensional to true. Setting the global work size based on the values inside \
22258  the global_work_size vector.");
22259  }
22260  if (temp_sz == 1){
22261  size[0] = params->global_work_size[0];
22262  work_dimension = 1;
22263  }
22264  else if (temp_sz == 2){
22265  size[0] = params->global_work_size[0];
22266  size[1] = params->global_work_size[1];
22267  work_dimension = 2;
22268  }
22269  else{
22270  size[0] = params->global_work_size[0];
22271  size[1] = params->global_work_size[1];
22272  size[2] = params->global_work_size[2];
22273  if (temp_sz > 3){
22274  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22275  }
22276  }
22277  }
22278 
22279  cl_event gpuExec;
22280 
22281  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22282 
22283  clWaitForEvents(1, &gpuExec);
22284 
22285  float *result = (float *) malloc(typesz);
22286  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22287 
22288  v->assign(result, result+sz);
22289 
22290  if (typesz2 != typesz or sz != sz2){
22291  float *result2;
22292  result2 = (float *) malloc(typesz2);
22293  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22294 
22295  v2->assign(result2, result2+sz2);
22296  free(result2);
22297  }
22298  else{
22299  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22300 
22301  v2->assign(result, result+sz2);
22302  }
22303 
22304  if (typesz3 != typesz or sz != sz3){
22305  int *result3;
22306  result3 = (int *) malloc(typesz3);
22307  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
22308 
22309  v3->assign(result3, result3+sz3);
22310  free(result3);
22311  }
22312  else{
22313  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22314 
22315  v3->assign(result, result+sz3);
22316  }
22317 
22318  clReleaseCommandQueue (queue);
22319  clReleaseMemObject(buffer);
22320  clReleaseMemObject(buffer2);
22321  clReleaseMemObject(buffer3);
22322  clReleaseEvent(gpuExec);
22323  free(result);
22324  }
22325 
22326 
22327  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
22328  size_t sz = v.size();
22329  size_t sz2 = v2.size();
22330  size_t sz3 = v3.size();
22331  size_t typesz = sizeof(float) * sz;
22332  size_t typesz2 = sizeof(float) * sz2;
22333  size_t typesz3 = sizeof(float) * sz3;
22334  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22335  if (temp_sz > 0){
22336  if (temp_sz > 2){
22337  if (temp_sz > 3){
22338  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22339  }
22340  typesz = sizeof(float) * params->buffers_size[0];
22341  typesz2 = sizeof(float) * params->buffers_size[1];
22342  typesz3 = sizeof(float) * params->buffers_size[2];
22343  }
22344  else{
22345  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22346  }
22347  }
22348  cl_int error = 0;
22349  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22350  checkError(error);
22351  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22352  checkError(error);
22353  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22354  checkError(error);
22355  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22356  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22357  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22358  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22359 
22360  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
22361  checkError (error);
22362  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22363  checkError (error);
22364  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22365  checkError (error);
22366 
22367  size_t size[3] = {sz, sz2, sz3};
22368  size_t work_dimension = 3;
22369 
22370  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22371  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22372  work_dimension = 1;
22373  }
22374  else if(temp_sz > 0){
22375  if (params->multi_dimensional){
22376  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22377  For default multidimensional global work size, leave the global_work_size vector empty, \
22378  and set multi_dimensional to true. Setting the global work size based on the values inside \
22379  the global_work_size vector.");
22380  }
22381  if (temp_sz == 1){
22382  size[0] = params->global_work_size[0];
22383  work_dimension = 1;
22384  }
22385  else if (temp_sz == 2){
22386  size[0] = params->global_work_size[0];
22387  size[1] = params->global_work_size[1];
22388  work_dimension = 2;
22389  }
22390  else{
22391  size[0] = params->global_work_size[0];
22392  size[1] = params->global_work_size[1];
22393  size[2] = params->global_work_size[2];
22394  if (temp_sz > 3){
22395  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22396  }
22397  }
22398  }
22399 
22400  cl_event gpuExec;
22401 
22402  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22403 
22404  clWaitForEvents(1, &gpuExec);
22405 
22406  float *result = (float *) malloc(typesz);
22407  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22408 
22409  std::vector<float> res = std::vector<float>();
22410  res.assign(result, result+sz);
22411 
22412  clReleaseCommandQueue (queue);
22413  clReleaseMemObject(buffer);
22414  clReleaseMemObject(buffer2);
22415  clReleaseMemObject(buffer3);
22416  clReleaseEvent(gpuExec);
22417  free(result);
22418 
22419  return res;
22420  }
22421 
22422  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
22423  size_t sz = v->size();
22424  size_t sz2 = v2.size();
22425  size_t sz3 = v3.size();
22426  size_t typesz = sizeof(float) * sz;
22427  size_t typesz2 = sizeof(float) * sz2;
22428  size_t typesz3 = sizeof(float) * sz3;
22429  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22430  if (temp_sz > 0){
22431  if (temp_sz > 2){
22432  if (temp_sz > 3){
22433  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22434  }
22435  typesz = sizeof(float) * params->buffers_size[0];
22436  typesz2 = sizeof(float) * params->buffers_size[1];
22437  typesz3 = sizeof(float) * params->buffers_size[2];
22438  }
22439  else{
22440  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22441  }
22442  }
22443  cl_int error = 0;
22444  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22445  checkError(error);
22446  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22447  checkError(error);
22448  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22449  checkError(error);
22450  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22451  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22452  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22453  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22454 
22455  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22456  checkError (error);
22457  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22458  checkError (error);
22459  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22460  checkError (error);
22461 
22462  size_t size[3] = {sz, sz2, sz3};
22463  size_t work_dimension = 3;
22464 
22465  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22466  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22467  work_dimension = 1;
22468  }
22469  else if(temp_sz > 0){
22470  if (params->multi_dimensional){
22471  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22472  For default multidimensional global work size, leave the global_work_size vector empty, \
22473  and set multi_dimensional to true. Setting the global work size based on the values inside \
22474  the global_work_size vector.");
22475  }
22476  if (temp_sz == 1){
22477  size[0] = params->global_work_size[0];
22478  work_dimension = 1;
22479  }
22480  else if (temp_sz == 2){
22481  size[0] = params->global_work_size[0];
22482  size[1] = params->global_work_size[1];
22483  work_dimension = 2;
22484  }
22485  else{
22486  size[0] = params->global_work_size[0];
22487  size[1] = params->global_work_size[1];
22488  size[2] = params->global_work_size[2];
22489  if (temp_sz > 3){
22490  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22491  }
22492  }
22493  }
22494 
22495  cl_event gpuExec;
22496 
22497  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22498 
22499  clWaitForEvents(1, &gpuExec);
22500 
22501  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22502 
22503  clWaitForEvents(1, &gpuExec);
22504 
22505  float *result = (float *) malloc(typesz);
22506  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22507 
22508  v->assign(result, result+sz);
22509 
22510  clReleaseCommandQueue (queue);
22511  clReleaseMemObject(buffer);
22512  clReleaseMemObject(buffer2);
22513  clReleaseMemObject(buffer3);
22514  clReleaseEvent(gpuExec);
22515  free(result);
22516  }
22517 
22518  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
22519  size_t sz = v->size();
22520  size_t sz2 = v2->size();
22521  size_t sz3 = v3.size();
22522  size_t typesz = sizeof(float) * sz;
22523  size_t typesz2 = sizeof(float) * sz2;
22524  size_t typesz3 = sizeof(float) * sz3;
22525  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22526  if (temp_sz > 0){
22527  if (temp_sz > 2){
22528  if (temp_sz > 3){
22529  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22530  }
22531  typesz = sizeof(float) * params->buffers_size[0];
22532  typesz2 = sizeof(float) * params->buffers_size[1];
22533  typesz3 = sizeof(float) * params->buffers_size[2];
22534  }
22535  else{
22536  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22537  }
22538  }
22539  cl_int error = 0;
22540  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22541  checkError(error);
22542  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22543  checkError(error);
22544  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22545  checkError(error);
22546  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22547  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22548  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22549  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22550 
22551  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22552  checkError (error);
22553  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22554  checkError (error);
22555  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22556  checkError (error);
22557 
22558  size_t size[3] = {sz, sz2, sz3};
22559  size_t work_dimension = 3;
22560 
22561  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22562  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22563  work_dimension = 1;
22564  }
22565  else if(temp_sz > 0){
22566  if (params->multi_dimensional){
22567  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22568  For default multidimensional global work size, leave the global_work_size vector empty, \
22569  and set multi_dimensional to true. Setting the global work size based on the values inside \
22570  the global_work_size vector.");
22571  }
22572  if (temp_sz == 1){
22573  size[0] = params->global_work_size[0];
22574  work_dimension = 1;
22575  }
22576  else if (temp_sz == 2){
22577  size[0] = params->global_work_size[0];
22578  size[1] = params->global_work_size[1];
22579  work_dimension = 2;
22580  }
22581  else{
22582  size[0] = params->global_work_size[0];
22583  size[1] = params->global_work_size[1];
22584  size[2] = params->global_work_size[2];
22585  if (temp_sz > 3){
22586  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22587  }
22588  }
22589  }
22590 
22591  cl_event gpuExec;
22592 
22593  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22594 
22595  clWaitForEvents(1, &gpuExec);
22596 
22597  float *result = (float *) malloc(typesz);
22598  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22599 
22600  v->assign(result, result+sz);
22601 
22602  if (typesz2 != typesz or sz != sz2){
22603  float *result2;
22604  result2 = (float *) malloc(typesz2);
22605  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22606 
22607  v2->assign(result2, result2+sz2);
22608  free(result2);
22609  }
22610  else{
22611  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22612 
22613  v2->assign(result, result+sz2);
22614  }
22615 
22616  clReleaseCommandQueue (queue);
22617  clReleaseMemObject(buffer);
22618  clReleaseMemObject(buffer2);
22619  clReleaseMemObject(buffer3);
22620  clReleaseEvent(gpuExec);
22621  free(result);
22622  }
22623 
22624  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
22625  size_t sz = v->size();
22626  size_t sz2 = v2->size();
22627  size_t sz3 = v3->size();
22628  size_t typesz = sizeof(float) * sz;
22629  size_t typesz2 = sizeof(float) * sz2;
22630  size_t typesz3 = sizeof(float) * sz3;
22631  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22632  if (temp_sz > 0){
22633  if (temp_sz > 2){
22634  if (temp_sz > 3){
22635  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22636  }
22637  typesz = sizeof(float) * params->buffers_size[0];
22638  typesz2 = sizeof(float) * params->buffers_size[1];
22639  typesz3 = sizeof(float) * params->buffers_size[2];
22640  }
22641  else{
22642  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22643  }
22644  }
22645  cl_int error = 0;
22646  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22647  checkError(error);
22648  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22649  checkError(error);
22650  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
22651  checkError(error);
22652  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22653  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22654  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22655  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22656 
22657  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22658  checkError (error);
22659  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22660  checkError (error);
22661  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
22662  checkError (error);
22663 
22664  size_t size[3] = {sz, sz2, sz3};
22665  size_t work_dimension = 3;
22666 
22667  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22668  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22669  work_dimension = 1;
22670  }
22671  else if(temp_sz > 0){
22672  if (params->multi_dimensional){
22673  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22674  For default multidimensional global work size, leave the global_work_size vector empty, \
22675  and set multi_dimensional to true. Setting the global work size based on the values inside \
22676  the global_work_size vector.");
22677  }
22678  if (temp_sz == 1){
22679  size[0] = params->global_work_size[0];
22680  work_dimension = 1;
22681  }
22682  else if (temp_sz == 2){
22683  size[0] = params->global_work_size[0];
22684  size[1] = params->global_work_size[1];
22685  work_dimension = 2;
22686  }
22687  else{
22688  size[0] = params->global_work_size[0];
22689  size[1] = params->global_work_size[1];
22690  size[2] = params->global_work_size[2];
22691  if (temp_sz > 3){
22692  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22693  }
22694  }
22695  }
22696 
22697  cl_event gpuExec;
22698 
22699  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22700 
22701  clWaitForEvents(1, &gpuExec);
22702 
22703  float *result = (float *) malloc(typesz);
22704  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22705 
22706  v->assign(result, result+sz);
22707 
22708  if (typesz2 != typesz or sz != sz2){
22709  float *result2;
22710  result2 = (float *) malloc(typesz2);
22711  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22712 
22713  v2->assign(result2, result2+sz2);
22714  free(result2);
22715  }
22716  else{
22717  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22718 
22719  v2->assign(result, result+sz2);
22720  }
22721 
22722  if (typesz3 != typesz or sz != sz3){
22723  float *result3;
22724  result3 = (float *) malloc(typesz3);
22725  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
22726 
22727  v3->assign(result3, result3+sz3);
22728  free(result3);
22729  }
22730  else{
22731  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22732 
22733  v3->assign(result, result+sz3);
22734  }
22735 
22736  clReleaseCommandQueue (queue);
22737  clReleaseMemObject(buffer);
22738  clReleaseMemObject(buffer2);
22739  clReleaseMemObject(buffer3);
22740  clReleaseEvent(gpuExec);
22741  free(result);
22742  }
22743 
22744 
22745  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
22746  size_t sz = v.size();
22747  size_t sz2 = v2.size();
22748  size_t sz3 = v3.size();
22749  size_t typesz = sizeof(float) * sz;
22750  size_t typesz2 = sizeof(float) * sz2;
22751  size_t typesz3 = sizeof(double) * sz3;
22752  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22753  if (temp_sz > 0){
22754  if (temp_sz > 2){
22755  if (temp_sz > 3){
22756  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22757  }
22758  typesz = sizeof(float) * params->buffers_size[0];
22759  typesz2 = sizeof(float) * params->buffers_size[1];
22760  typesz3 = sizeof(double) * params->buffers_size[2];
22761  }
22762  else{
22763  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22764  }
22765  }
22766  cl_int error = 0;
22767  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22768  checkError(error);
22769  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22770  checkError(error);
22771  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22772  checkError(error);
22773  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22774  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22775  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22776  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22777 
22778  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
22779  checkError (error);
22780  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22781  checkError (error);
22782  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22783  checkError (error);
22784 
22785  size_t size[3] = {sz, sz2, sz3};
22786  size_t work_dimension = 3;
22787 
22788  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22789  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22790  work_dimension = 1;
22791  }
22792  else if(temp_sz > 0){
22793  if (params->multi_dimensional){
22794  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22795  For default multidimensional global work size, leave the global_work_size vector empty, \
22796  and set multi_dimensional to true. Setting the global work size based on the values inside \
22797  the global_work_size vector.");
22798  }
22799  if (temp_sz == 1){
22800  size[0] = params->global_work_size[0];
22801  work_dimension = 1;
22802  }
22803  else if (temp_sz == 2){
22804  size[0] = params->global_work_size[0];
22805  size[1] = params->global_work_size[1];
22806  work_dimension = 2;
22807  }
22808  else{
22809  size[0] = params->global_work_size[0];
22810  size[1] = params->global_work_size[1];
22811  size[2] = params->global_work_size[2];
22812  if (temp_sz > 3){
22813  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22814  }
22815  }
22816  }
22817 
22818  cl_event gpuExec;
22819 
22820  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22821 
22822  clWaitForEvents(1, &gpuExec);
22823 
22824  float *result = (float *) malloc(typesz);
22825  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22826 
22827  std::vector<float> res = std::vector<float>();
22828  res.assign(result, result+sz);
22829 
22830  clReleaseCommandQueue (queue);
22831  clReleaseMemObject(buffer);
22832  clReleaseMemObject(buffer2);
22833  clReleaseMemObject(buffer3);
22834  clReleaseEvent(gpuExec);
22835  free(result);
22836 
22837  return res;
22838  }
22839 
22840  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
22841  size_t sz = v->size();
22842  size_t sz2 = v2.size();
22843  size_t sz3 = v3.size();
22844  size_t typesz = sizeof(float) * sz;
22845  size_t typesz2 = sizeof(float) * sz2;
22846  size_t typesz3 = sizeof(double) * sz3;
22847  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22848  if (temp_sz > 0){
22849  if (temp_sz > 2){
22850  if (temp_sz > 3){
22851  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22852  }
22853  typesz = sizeof(float) * params->buffers_size[0];
22854  typesz2 = sizeof(float) * params->buffers_size[1];
22855  typesz3 = sizeof(double) * params->buffers_size[2];
22856  }
22857  else{
22858  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22859  }
22860  }
22861  cl_int error = 0;
22862  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22863  checkError(error);
22864  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22865  checkError(error);
22866  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22867  checkError(error);
22868  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22869  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22870  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22871  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22872 
22873  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22874  checkError (error);
22875  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22876  checkError (error);
22877  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22878  checkError (error);
22879 
22880  size_t size[3] = {sz, sz2, sz3};
22881  size_t work_dimension = 3;
22882 
22883  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22884  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22885  work_dimension = 1;
22886  }
22887  else if(temp_sz > 0){
22888  if (params->multi_dimensional){
22889  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22890  For default multidimensional global work size, leave the global_work_size vector empty, \
22891  and set multi_dimensional to true. Setting the global work size based on the values inside \
22892  the global_work_size vector.");
22893  }
22894  if (temp_sz == 1){
22895  size[0] = params->global_work_size[0];
22896  work_dimension = 1;
22897  }
22898  else if (temp_sz == 2){
22899  size[0] = params->global_work_size[0];
22900  size[1] = params->global_work_size[1];
22901  work_dimension = 2;
22902  }
22903  else{
22904  size[0] = params->global_work_size[0];
22905  size[1] = params->global_work_size[1];
22906  size[2] = params->global_work_size[2];
22907  if (temp_sz > 3){
22908  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22909  }
22910  }
22911  }
22912 
22913  cl_event gpuExec;
22914 
22915  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22916 
22917  clWaitForEvents(1, &gpuExec);
22918 
22919  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22920 
22921  clWaitForEvents(1, &gpuExec);
22922 
22923  float *result = (float *) malloc(typesz);
22924  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22925 
22926  v->assign(result, result+sz);
22927 
22928  clReleaseCommandQueue (queue);
22929  clReleaseMemObject(buffer);
22930  clReleaseMemObject(buffer2);
22931  clReleaseMemObject(buffer3);
22932  clReleaseEvent(gpuExec);
22933  free(result);
22934  }
22935 
22936  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
22937  size_t sz = v->size();
22938  size_t sz2 = v2->size();
22939  size_t sz3 = v3.size();
22940  size_t typesz = sizeof(float) * sz;
22941  size_t typesz2 = sizeof(float) * sz2;
22942  size_t typesz3 = sizeof(double) * sz3;
22943  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
22944  if (temp_sz > 0){
22945  if (temp_sz > 2){
22946  if (temp_sz > 3){
22947  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22948  }
22949  typesz = sizeof(float) * params->buffers_size[0];
22950  typesz2 = sizeof(float) * params->buffers_size[1];
22951  typesz3 = sizeof(double) * params->buffers_size[2];
22952  }
22953  else{
22954  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22955  }
22956  }
22957  cl_int error = 0;
22958  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22959  checkError(error);
22960  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22961  checkError(error);
22962  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22963  checkError(error);
22964  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
22965  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
22966  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
22967  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
22968 
22969  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22970  checkError (error);
22971  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22972  checkError (error);
22973  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22974  checkError (error);
22975 
22976  size_t size[3] = {sz, sz2, sz3};
22977  size_t work_dimension = 3;
22978 
22979  temp_sz = params != NULL ? params->global_work_size.size() : 0;
22980  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
22981  work_dimension = 1;
22982  }
22983  else if(temp_sz > 0){
22984  if (params->multi_dimensional){
22985  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
22986  For default multidimensional global work size, leave the global_work_size vector empty, \
22987  and set multi_dimensional to true. Setting the global work size based on the values inside \
22988  the global_work_size vector.");
22989  }
22990  if (temp_sz == 1){
22991  size[0] = params->global_work_size[0];
22992  work_dimension = 1;
22993  }
22994  else if (temp_sz == 2){
22995  size[0] = params->global_work_size[0];
22996  size[1] = params->global_work_size[1];
22997  work_dimension = 2;
22998  }
22999  else{
23000  size[0] = params->global_work_size[0];
23001  size[1] = params->global_work_size[1];
23002  size[2] = params->global_work_size[2];
23003  if (temp_sz > 3){
23004  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23005  }
23006  }
23007  }
23008 
23009  cl_event gpuExec;
23010 
23011  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23012 
23013  clWaitForEvents(1, &gpuExec);
23014 
23015  float *result = (float *) malloc(typesz);
23016  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23017 
23018  v->assign(result, result+sz);
23019 
23020  if (typesz2 != typesz or sz != sz2){
23021  float *result2;
23022  result2 = (float *) malloc(typesz2);
23023  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23024 
23025  v2->assign(result2, result2+sz2);
23026  free(result2);
23027  }
23028  else{
23029  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23030 
23031  v2->assign(result, result+sz2);
23032  }
23033 
23034  clReleaseCommandQueue (queue);
23035  clReleaseMemObject(buffer);
23036  clReleaseMemObject(buffer2);
23037  clReleaseMemObject(buffer3);
23038  clReleaseEvent(gpuExec);
23039  free(result);
23040  }
23041 
23042  void ROS_OpenCL::process(std::vector<float>* v, std::vector<float>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
23043  size_t sz = v->size();
23044  size_t sz2 = v2->size();
23045  size_t sz3 = v3->size();
23046  size_t typesz = sizeof(float) * sz;
23047  size_t typesz2 = sizeof(float) * sz2;
23048  size_t typesz3 = sizeof(double) * sz3;
23049  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23050  if (temp_sz > 0){
23051  if (temp_sz > 2){
23052  if (temp_sz > 3){
23053  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23054  }
23055  typesz = sizeof(float) * params->buffers_size[0];
23056  typesz2 = sizeof(float) * params->buffers_size[1];
23057  typesz3 = sizeof(double) * params->buffers_size[2];
23058  }
23059  else{
23060  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23061  }
23062  }
23063  cl_int error = 0;
23064  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23065  checkError(error);
23066  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23067  checkError(error);
23068  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23069  checkError(error);
23070  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23071  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23072  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23073  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23074 
23075  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23076  checkError (error);
23077  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23078  checkError (error);
23079  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23080  checkError (error);
23081 
23082  size_t size[3] = {sz, sz2, sz3};
23083  size_t work_dimension = 3;
23084 
23085  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23086  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23087  work_dimension = 1;
23088  }
23089  else if(temp_sz > 0){
23090  if (params->multi_dimensional){
23091  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23092  For default multidimensional global work size, leave the global_work_size vector empty, \
23093  and set multi_dimensional to true. Setting the global work size based on the values inside \
23094  the global_work_size vector.");
23095  }
23096  if (temp_sz == 1){
23097  size[0] = params->global_work_size[0];
23098  work_dimension = 1;
23099  }
23100  else if (temp_sz == 2){
23101  size[0] = params->global_work_size[0];
23102  size[1] = params->global_work_size[1];
23103  work_dimension = 2;
23104  }
23105  else{
23106  size[0] = params->global_work_size[0];
23107  size[1] = params->global_work_size[1];
23108  size[2] = params->global_work_size[2];
23109  if (temp_sz > 3){
23110  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23111  }
23112  }
23113  }
23114 
23115  cl_event gpuExec;
23116 
23117  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23118 
23119  clWaitForEvents(1, &gpuExec);
23120 
23121  float *result = (float *) malloc(typesz);
23122  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23123 
23124  v->assign(result, result+sz);
23125 
23126  if (typesz2 != typesz or sz != sz2){
23127  float *result2;
23128  result2 = (float *) malloc(typesz2);
23129  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23130 
23131  v2->assign(result2, result2+sz2);
23132  free(result2);
23133  }
23134  else{
23135  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23136 
23137  v2->assign(result, result+sz2);
23138  }
23139 
23140  if (typesz3 != typesz or sz != sz3){
23141  double *result3;
23142  result3 = (double *) malloc(typesz3);
23143  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23144 
23145  v3->assign(result3, result3+sz3);
23146  free(result3);
23147  }
23148  else{
23149  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23150 
23151  v3->assign(result, result+sz3);
23152  }
23153 
23154  clReleaseCommandQueue (queue);
23155  clReleaseMemObject(buffer);
23156  clReleaseMemObject(buffer2);
23157  clReleaseMemObject(buffer3);
23158  clReleaseEvent(gpuExec);
23159  free(result);
23160  }
23161 
23162 
23163  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
23164  size_t sz = v.size();
23165  size_t sz2 = v2.size();
23166  size_t sz3 = v3.size();
23167  size_t typesz = sizeof(float) * sz;
23168  size_t typesz2 = sizeof(double) * sz2;
23169  size_t typesz3 = sizeof(char) * sz3;
23170  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23171  if (temp_sz > 0){
23172  if (temp_sz > 2){
23173  if (temp_sz > 3){
23174  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23175  }
23176  typesz = sizeof(float) * params->buffers_size[0];
23177  typesz2 = sizeof(double) * params->buffers_size[1];
23178  typesz3 = sizeof(char) * params->buffers_size[2];
23179  }
23180  else{
23181  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23182  }
23183  }
23184  cl_int error = 0;
23185  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23186  checkError(error);
23187  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23188  checkError(error);
23189  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23190  checkError(error);
23191  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23192  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23193  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23194  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23195 
23196  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
23197  checkError (error);
23198  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23199  checkError (error);
23200  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23201  checkError (error);
23202 
23203  size_t size[3] = {sz, sz2, sz3};
23204  size_t work_dimension = 3;
23205 
23206  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23207  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23208  work_dimension = 1;
23209  }
23210  else if(temp_sz > 0){
23211  if (params->multi_dimensional){
23212  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23213  For default multidimensional global work size, leave the global_work_size vector empty, \
23214  and set multi_dimensional to true. Setting the global work size based on the values inside \
23215  the global_work_size vector.");
23216  }
23217  if (temp_sz == 1){
23218  size[0] = params->global_work_size[0];
23219  work_dimension = 1;
23220  }
23221  else if (temp_sz == 2){
23222  size[0] = params->global_work_size[0];
23223  size[1] = params->global_work_size[1];
23224  work_dimension = 2;
23225  }
23226  else{
23227  size[0] = params->global_work_size[0];
23228  size[1] = params->global_work_size[1];
23229  size[2] = params->global_work_size[2];
23230  if (temp_sz > 3){
23231  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23232  }
23233  }
23234  }
23235 
23236  cl_event gpuExec;
23237 
23238  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23239 
23240  clWaitForEvents(1, &gpuExec);
23241 
23242  float *result = (float *) malloc(typesz);
23243  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23244 
23245  std::vector<float> res = std::vector<float>();
23246  res.assign(result, result+sz);
23247 
23248  clReleaseCommandQueue (queue);
23249  clReleaseMemObject(buffer);
23250  clReleaseMemObject(buffer2);
23251  clReleaseMemObject(buffer3);
23252  clReleaseEvent(gpuExec);
23253  free(result);
23254 
23255  return res;
23256  }
23257 
23258  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
23259  size_t sz = v->size();
23260  size_t sz2 = v2.size();
23261  size_t sz3 = v3.size();
23262  size_t typesz = sizeof(float) * sz;
23263  size_t typesz2 = sizeof(double) * sz2;
23264  size_t typesz3 = sizeof(char) * sz3;
23265  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23266  if (temp_sz > 0){
23267  if (temp_sz > 2){
23268  if (temp_sz > 3){
23269  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23270  }
23271  typesz = sizeof(float) * params->buffers_size[0];
23272  typesz2 = sizeof(double) * params->buffers_size[1];
23273  typesz3 = sizeof(char) * params->buffers_size[2];
23274  }
23275  else{
23276  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23277  }
23278  }
23279  cl_int error = 0;
23280  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23281  checkError(error);
23282  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23283  checkError(error);
23284  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23285  checkError(error);
23286  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23287  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23288  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23289  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23290 
23291  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23292  checkError (error);
23293  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23294  checkError (error);
23295  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23296  checkError (error);
23297 
23298  size_t size[3] = {sz, sz2, sz3};
23299  size_t work_dimension = 3;
23300 
23301  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23302  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23303  work_dimension = 1;
23304  }
23305  else if(temp_sz > 0){
23306  if (params->multi_dimensional){
23307  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23308  For default multidimensional global work size, leave the global_work_size vector empty, \
23309  and set multi_dimensional to true. Setting the global work size based on the values inside \
23310  the global_work_size vector.");
23311  }
23312  if (temp_sz == 1){
23313  size[0] = params->global_work_size[0];
23314  work_dimension = 1;
23315  }
23316  else if (temp_sz == 2){
23317  size[0] = params->global_work_size[0];
23318  size[1] = params->global_work_size[1];
23319  work_dimension = 2;
23320  }
23321  else{
23322  size[0] = params->global_work_size[0];
23323  size[1] = params->global_work_size[1];
23324  size[2] = params->global_work_size[2];
23325  if (temp_sz > 3){
23326  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23327  }
23328  }
23329  }
23330 
23331  cl_event gpuExec;
23332 
23333  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23334 
23335  clWaitForEvents(1, &gpuExec);
23336 
23337  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23338 
23339  clWaitForEvents(1, &gpuExec);
23340 
23341  float *result = (float *) malloc(typesz);
23342  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23343 
23344  v->assign(result, result+sz);
23345 
23346  clReleaseCommandQueue (queue);
23347  clReleaseMemObject(buffer);
23348  clReleaseMemObject(buffer2);
23349  clReleaseMemObject(buffer3);
23350  clReleaseEvent(gpuExec);
23351  free(result);
23352  }
23353 
23354  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
23355  size_t sz = v->size();
23356  size_t sz2 = v2->size();
23357  size_t sz3 = v3.size();
23358  size_t typesz = sizeof(float) * sz;
23359  size_t typesz2 = sizeof(double) * sz2;
23360  size_t typesz3 = sizeof(char) * sz3;
23361  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23362  if (temp_sz > 0){
23363  if (temp_sz > 2){
23364  if (temp_sz > 3){
23365  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23366  }
23367  typesz = sizeof(float) * params->buffers_size[0];
23368  typesz2 = sizeof(double) * params->buffers_size[1];
23369  typesz3 = sizeof(char) * params->buffers_size[2];
23370  }
23371  else{
23372  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23373  }
23374  }
23375  cl_int error = 0;
23376  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23377  checkError(error);
23378  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23379  checkError(error);
23380  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23381  checkError(error);
23382  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23383  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23384  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23385  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23386 
23387  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23388  checkError (error);
23389  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23390  checkError (error);
23391  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23392  checkError (error);
23393 
23394  size_t size[3] = {sz, sz2, sz3};
23395  size_t work_dimension = 3;
23396 
23397  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23398  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23399  work_dimension = 1;
23400  }
23401  else if(temp_sz > 0){
23402  if (params->multi_dimensional){
23403  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23404  For default multidimensional global work size, leave the global_work_size vector empty, \
23405  and set multi_dimensional to true. Setting the global work size based on the values inside \
23406  the global_work_size vector.");
23407  }
23408  if (temp_sz == 1){
23409  size[0] = params->global_work_size[0];
23410  work_dimension = 1;
23411  }
23412  else if (temp_sz == 2){
23413  size[0] = params->global_work_size[0];
23414  size[1] = params->global_work_size[1];
23415  work_dimension = 2;
23416  }
23417  else{
23418  size[0] = params->global_work_size[0];
23419  size[1] = params->global_work_size[1];
23420  size[2] = params->global_work_size[2];
23421  if (temp_sz > 3){
23422  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23423  }
23424  }
23425  }
23426 
23427  cl_event gpuExec;
23428 
23429  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23430 
23431  clWaitForEvents(1, &gpuExec);
23432 
23433  float *result = (float *) malloc(typesz);
23434  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23435 
23436  v->assign(result, result+sz);
23437 
23438  if (typesz2 != typesz or sz != sz2){
23439  double *result2;
23440  result2 = (double *) malloc(typesz2);
23441  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23442 
23443  v2->assign(result2, result2+sz2);
23444  free(result2);
23445  }
23446  else{
23447  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23448 
23449  v2->assign(result, result+sz2);
23450  }
23451 
23452  clReleaseCommandQueue (queue);
23453  clReleaseMemObject(buffer);
23454  clReleaseMemObject(buffer2);
23455  clReleaseMemObject(buffer3);
23456  clReleaseEvent(gpuExec);
23457  free(result);
23458  }
23459 
23460  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
23461  size_t sz = v->size();
23462  size_t sz2 = v2->size();
23463  size_t sz3 = v3->size();
23464  size_t typesz = sizeof(float) * sz;
23465  size_t typesz2 = sizeof(double) * sz2;
23466  size_t typesz3 = sizeof(char) * sz3;
23467  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23468  if (temp_sz > 0){
23469  if (temp_sz > 2){
23470  if (temp_sz > 3){
23471  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23472  }
23473  typesz = sizeof(float) * params->buffers_size[0];
23474  typesz2 = sizeof(double) * params->buffers_size[1];
23475  typesz3 = sizeof(char) * params->buffers_size[2];
23476  }
23477  else{
23478  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23479  }
23480  }
23481  cl_int error = 0;
23482  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23483  checkError(error);
23484  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23485  checkError(error);
23486  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23487  checkError(error);
23488  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23489  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23490  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23491  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23492 
23493  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23494  checkError (error);
23495  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23496  checkError (error);
23497  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23498  checkError (error);
23499 
23500  size_t size[3] = {sz, sz2, sz3};
23501  size_t work_dimension = 3;
23502 
23503  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23504  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23505  work_dimension = 1;
23506  }
23507  else if(temp_sz > 0){
23508  if (params->multi_dimensional){
23509  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23510  For default multidimensional global work size, leave the global_work_size vector empty, \
23511  and set multi_dimensional to true. Setting the global work size based on the values inside \
23512  the global_work_size vector.");
23513  }
23514  if (temp_sz == 1){
23515  size[0] = params->global_work_size[0];
23516  work_dimension = 1;
23517  }
23518  else if (temp_sz == 2){
23519  size[0] = params->global_work_size[0];
23520  size[1] = params->global_work_size[1];
23521  work_dimension = 2;
23522  }
23523  else{
23524  size[0] = params->global_work_size[0];
23525  size[1] = params->global_work_size[1];
23526  size[2] = params->global_work_size[2];
23527  if (temp_sz > 3){
23528  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23529  }
23530  }
23531  }
23532 
23533  cl_event gpuExec;
23534 
23535  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23536 
23537  clWaitForEvents(1, &gpuExec);
23538 
23539  float *result = (float *) malloc(typesz);
23540  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23541 
23542  v->assign(result, result+sz);
23543 
23544  if (typesz2 != typesz or sz != sz2){
23545  double *result2;
23546  result2 = (double *) malloc(typesz2);
23547  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23548 
23549  v2->assign(result2, result2+sz2);
23550  free(result2);
23551  }
23552  else{
23553  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23554 
23555  v2->assign(result, result+sz2);
23556  }
23557 
23558  if (typesz3 != typesz or sz != sz3){
23559  char *result3;
23560  result3 = (char *) malloc(typesz3);
23561  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23562 
23563  v3->assign(result3, result3+sz3);
23564  free(result3);
23565  }
23566  else{
23567  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23568 
23569  v3->assign(result, result+sz3);
23570  }
23571 
23572  clReleaseCommandQueue (queue);
23573  clReleaseMemObject(buffer);
23574  clReleaseMemObject(buffer2);
23575  clReleaseMemObject(buffer3);
23576  clReleaseEvent(gpuExec);
23577  free(result);
23578  }
23579 
23580 
23581  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
23582  size_t sz = v.size();
23583  size_t sz2 = v2.size();
23584  size_t sz3 = v3.size();
23585  size_t typesz = sizeof(float) * sz;
23586  size_t typesz2 = sizeof(double) * sz2;
23587  size_t typesz3 = sizeof(int) * sz3;
23588  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23589  if (temp_sz > 0){
23590  if (temp_sz > 2){
23591  if (temp_sz > 3){
23592  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23593  }
23594  typesz = sizeof(float) * params->buffers_size[0];
23595  typesz2 = sizeof(double) * params->buffers_size[1];
23596  typesz3 = sizeof(int) * params->buffers_size[2];
23597  }
23598  else{
23599  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23600  }
23601  }
23602  cl_int error = 0;
23603  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23604  checkError(error);
23605  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23606  checkError(error);
23607  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23608  checkError(error);
23609  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23610  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23611  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23612  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23613 
23614  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
23615  checkError (error);
23616  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23617  checkError (error);
23618  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23619  checkError (error);
23620 
23621  size_t size[3] = {sz, sz2, sz3};
23622  size_t work_dimension = 3;
23623 
23624  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23625  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23626  work_dimension = 1;
23627  }
23628  else if(temp_sz > 0){
23629  if (params->multi_dimensional){
23630  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23631  For default multidimensional global work size, leave the global_work_size vector empty, \
23632  and set multi_dimensional to true. Setting the global work size based on the values inside \
23633  the global_work_size vector.");
23634  }
23635  if (temp_sz == 1){
23636  size[0] = params->global_work_size[0];
23637  work_dimension = 1;
23638  }
23639  else if (temp_sz == 2){
23640  size[0] = params->global_work_size[0];
23641  size[1] = params->global_work_size[1];
23642  work_dimension = 2;
23643  }
23644  else{
23645  size[0] = params->global_work_size[0];
23646  size[1] = params->global_work_size[1];
23647  size[2] = params->global_work_size[2];
23648  if (temp_sz > 3){
23649  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23650  }
23651  }
23652  }
23653 
23654  cl_event gpuExec;
23655 
23656  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23657 
23658  clWaitForEvents(1, &gpuExec);
23659 
23660  float *result = (float *) malloc(typesz);
23661  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23662 
23663  std::vector<float> res = std::vector<float>();
23664  res.assign(result, result+sz);
23665 
23666  clReleaseCommandQueue (queue);
23667  clReleaseMemObject(buffer);
23668  clReleaseMemObject(buffer2);
23669  clReleaseMemObject(buffer3);
23670  clReleaseEvent(gpuExec);
23671  free(result);
23672 
23673  return res;
23674  }
23675 
23676  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
23677  size_t sz = v->size();
23678  size_t sz2 = v2.size();
23679  size_t sz3 = v3.size();
23680  size_t typesz = sizeof(float) * sz;
23681  size_t typesz2 = sizeof(double) * sz2;
23682  size_t typesz3 = sizeof(int) * sz3;
23683  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23684  if (temp_sz > 0){
23685  if (temp_sz > 2){
23686  if (temp_sz > 3){
23687  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23688  }
23689  typesz = sizeof(float) * params->buffers_size[0];
23690  typesz2 = sizeof(double) * params->buffers_size[1];
23691  typesz3 = sizeof(int) * params->buffers_size[2];
23692  }
23693  else{
23694  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23695  }
23696  }
23697  cl_int error = 0;
23698  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23699  checkError(error);
23700  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23701  checkError(error);
23702  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23703  checkError(error);
23704  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23705  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23706  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23707  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23708 
23709  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23710  checkError (error);
23711  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23712  checkError (error);
23713  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23714  checkError (error);
23715 
23716  size_t size[3] = {sz, sz2, sz3};
23717  size_t work_dimension = 3;
23718 
23719  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23720  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23721  work_dimension = 1;
23722  }
23723  else if(temp_sz > 0){
23724  if (params->multi_dimensional){
23725  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23726  For default multidimensional global work size, leave the global_work_size vector empty, \
23727  and set multi_dimensional to true. Setting the global work size based on the values inside \
23728  the global_work_size vector.");
23729  }
23730  if (temp_sz == 1){
23731  size[0] = params->global_work_size[0];
23732  work_dimension = 1;
23733  }
23734  else if (temp_sz == 2){
23735  size[0] = params->global_work_size[0];
23736  size[1] = params->global_work_size[1];
23737  work_dimension = 2;
23738  }
23739  else{
23740  size[0] = params->global_work_size[0];
23741  size[1] = params->global_work_size[1];
23742  size[2] = params->global_work_size[2];
23743  if (temp_sz > 3){
23744  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23745  }
23746  }
23747  }
23748 
23749  cl_event gpuExec;
23750 
23751  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23752 
23753  clWaitForEvents(1, &gpuExec);
23754 
23755  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23756 
23757  clWaitForEvents(1, &gpuExec);
23758 
23759  float *result = (float *) malloc(typesz);
23760  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23761 
23762  v->assign(result, result+sz);
23763 
23764  clReleaseCommandQueue (queue);
23765  clReleaseMemObject(buffer);
23766  clReleaseMemObject(buffer2);
23767  clReleaseMemObject(buffer3);
23768  clReleaseEvent(gpuExec);
23769  free(result);
23770  }
23771 
23772  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
23773  size_t sz = v->size();
23774  size_t sz2 = v2->size();
23775  size_t sz3 = v3.size();
23776  size_t typesz = sizeof(float) * sz;
23777  size_t typesz2 = sizeof(double) * sz2;
23778  size_t typesz3 = sizeof(int) * sz3;
23779  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23780  if (temp_sz > 0){
23781  if (temp_sz > 2){
23782  if (temp_sz > 3){
23783  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23784  }
23785  typesz = sizeof(float) * params->buffers_size[0];
23786  typesz2 = sizeof(double) * params->buffers_size[1];
23787  typesz3 = sizeof(int) * params->buffers_size[2];
23788  }
23789  else{
23790  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23791  }
23792  }
23793  cl_int error = 0;
23794  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23795  checkError(error);
23796  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23797  checkError(error);
23798  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23799  checkError(error);
23800  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23801  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23802  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23803  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23804 
23805  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23806  checkError (error);
23807  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23808  checkError (error);
23809  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23810  checkError (error);
23811 
23812  size_t size[3] = {sz, sz2, sz3};
23813  size_t work_dimension = 3;
23814 
23815  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23816  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23817  work_dimension = 1;
23818  }
23819  else if(temp_sz > 0){
23820  if (params->multi_dimensional){
23821  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23822  For default multidimensional global work size, leave the global_work_size vector empty, \
23823  and set multi_dimensional to true. Setting the global work size based on the values inside \
23824  the global_work_size vector.");
23825  }
23826  if (temp_sz == 1){
23827  size[0] = params->global_work_size[0];
23828  work_dimension = 1;
23829  }
23830  else if (temp_sz == 2){
23831  size[0] = params->global_work_size[0];
23832  size[1] = params->global_work_size[1];
23833  work_dimension = 2;
23834  }
23835  else{
23836  size[0] = params->global_work_size[0];
23837  size[1] = params->global_work_size[1];
23838  size[2] = params->global_work_size[2];
23839  if (temp_sz > 3){
23840  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23841  }
23842  }
23843  }
23844 
23845  cl_event gpuExec;
23846 
23847  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23848 
23849  clWaitForEvents(1, &gpuExec);
23850 
23851  float *result = (float *) malloc(typesz);
23852  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23853 
23854  v->assign(result, result+sz);
23855 
23856  if (typesz2 != typesz or sz != sz2){
23857  double *result2;
23858  result2 = (double *) malloc(typesz2);
23859  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23860 
23861  v2->assign(result2, result2+sz2);
23862  free(result2);
23863  }
23864  else{
23865  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23866 
23867  v2->assign(result, result+sz2);
23868  }
23869 
23870  clReleaseCommandQueue (queue);
23871  clReleaseMemObject(buffer);
23872  clReleaseMemObject(buffer2);
23873  clReleaseMemObject(buffer3);
23874  clReleaseEvent(gpuExec);
23875  free(result);
23876  }
23877 
23878  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
23879  size_t sz = v->size();
23880  size_t sz2 = v2->size();
23881  size_t sz3 = v3->size();
23882  size_t typesz = sizeof(float) * sz;
23883  size_t typesz2 = sizeof(double) * sz2;
23884  size_t typesz3 = sizeof(int) * sz3;
23885  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
23886  if (temp_sz > 0){
23887  if (temp_sz > 2){
23888  if (temp_sz > 3){
23889  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23890  }
23891  typesz = sizeof(float) * params->buffers_size[0];
23892  typesz2 = sizeof(double) * params->buffers_size[1];
23893  typesz3 = sizeof(int) * params->buffers_size[2];
23894  }
23895  else{
23896  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23897  }
23898  }
23899  cl_int error = 0;
23900  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23901  checkError(error);
23902  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23903  checkError(error);
23904  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23905  checkError(error);
23906  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
23907  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
23908  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
23909  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
23910 
23911  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23912  checkError (error);
23913  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23914  checkError (error);
23915  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23916  checkError (error);
23917 
23918  size_t size[3] = {sz, sz2, sz3};
23919  size_t work_dimension = 3;
23920 
23921  temp_sz = params != NULL ? params->global_work_size.size() : 0;
23922  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
23923  work_dimension = 1;
23924  }
23925  else if(temp_sz > 0){
23926  if (params->multi_dimensional){
23927  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
23928  For default multidimensional global work size, leave the global_work_size vector empty, \
23929  and set multi_dimensional to true. Setting the global work size based on the values inside \
23930  the global_work_size vector.");
23931  }
23932  if (temp_sz == 1){
23933  size[0] = params->global_work_size[0];
23934  work_dimension = 1;
23935  }
23936  else if (temp_sz == 2){
23937  size[0] = params->global_work_size[0];
23938  size[1] = params->global_work_size[1];
23939  work_dimension = 2;
23940  }
23941  else{
23942  size[0] = params->global_work_size[0];
23943  size[1] = params->global_work_size[1];
23944  size[2] = params->global_work_size[2];
23945  if (temp_sz > 3){
23946  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23947  }
23948  }
23949  }
23950 
23951  cl_event gpuExec;
23952 
23953  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23954 
23955  clWaitForEvents(1, &gpuExec);
23956 
23957  float *result = (float *) malloc(typesz);
23958  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23959 
23960  v->assign(result, result+sz);
23961 
23962  if (typesz2 != typesz or sz != sz2){
23963  double *result2;
23964  result2 = (double *) malloc(typesz2);
23965  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23966 
23967  v2->assign(result2, result2+sz2);
23968  free(result2);
23969  }
23970  else{
23971  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23972 
23973  v2->assign(result, result+sz2);
23974  }
23975 
23976  if (typesz3 != typesz or sz != sz3){
23977  int *result3;
23978  result3 = (int *) malloc(typesz3);
23979  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23980 
23981  v3->assign(result3, result3+sz3);
23982  free(result3);
23983  }
23984  else{
23985  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23986 
23987  v3->assign(result, result+sz3);
23988  }
23989 
23990  clReleaseCommandQueue (queue);
23991  clReleaseMemObject(buffer);
23992  clReleaseMemObject(buffer2);
23993  clReleaseMemObject(buffer3);
23994  clReleaseEvent(gpuExec);
23995  free(result);
23996  }
23997 
23998 
23999  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
24000  size_t sz = v.size();
24001  size_t sz2 = v2.size();
24002  size_t sz3 = v3.size();
24003  size_t typesz = sizeof(float) * sz;
24004  size_t typesz2 = sizeof(double) * sz2;
24005  size_t typesz3 = sizeof(float) * sz3;
24006  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24007  if (temp_sz > 0){
24008  if (temp_sz > 2){
24009  if (temp_sz > 3){
24010  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24011  }
24012  typesz = sizeof(float) * params->buffers_size[0];
24013  typesz2 = sizeof(double) * params->buffers_size[1];
24014  typesz3 = sizeof(float) * params->buffers_size[2];
24015  }
24016  else{
24017  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24018  }
24019  }
24020  cl_int error = 0;
24021  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24022  checkError(error);
24023  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24024  checkError(error);
24025  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24026  checkError(error);
24027  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24028  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24029  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24030  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24031 
24032  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24033  checkError (error);
24034  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24035  checkError (error);
24036  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24037  checkError (error);
24038 
24039  size_t size[3] = {sz, sz2, sz3};
24040  size_t work_dimension = 3;
24041 
24042  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24043  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24044  work_dimension = 1;
24045  }
24046  else if(temp_sz > 0){
24047  if (params->multi_dimensional){
24048  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24049  For default multidimensional global work size, leave the global_work_size vector empty, \
24050  and set multi_dimensional to true. Setting the global work size based on the values inside \
24051  the global_work_size vector.");
24052  }
24053  if (temp_sz == 1){
24054  size[0] = params->global_work_size[0];
24055  work_dimension = 1;
24056  }
24057  else if (temp_sz == 2){
24058  size[0] = params->global_work_size[0];
24059  size[1] = params->global_work_size[1];
24060  work_dimension = 2;
24061  }
24062  else{
24063  size[0] = params->global_work_size[0];
24064  size[1] = params->global_work_size[1];
24065  size[2] = params->global_work_size[2];
24066  if (temp_sz > 3){
24067  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24068  }
24069  }
24070  }
24071 
24072  cl_event gpuExec;
24073 
24074  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24075 
24076  clWaitForEvents(1, &gpuExec);
24077 
24078  float *result = (float *) malloc(typesz);
24079  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24080 
24081  std::vector<float> res = std::vector<float>();
24082  res.assign(result, result+sz);
24083 
24084  clReleaseCommandQueue (queue);
24085  clReleaseMemObject(buffer);
24086  clReleaseMemObject(buffer2);
24087  clReleaseMemObject(buffer3);
24088  clReleaseEvent(gpuExec);
24089  free(result);
24090 
24091  return res;
24092  }
24093 
24094  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
24095  size_t sz = v->size();
24096  size_t sz2 = v2.size();
24097  size_t sz3 = v3.size();
24098  size_t typesz = sizeof(float) * sz;
24099  size_t typesz2 = sizeof(double) * sz2;
24100  size_t typesz3 = sizeof(float) * sz3;
24101  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24102  if (temp_sz > 0){
24103  if (temp_sz > 2){
24104  if (temp_sz > 3){
24105  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24106  }
24107  typesz = sizeof(float) * params->buffers_size[0];
24108  typesz2 = sizeof(double) * params->buffers_size[1];
24109  typesz3 = sizeof(float) * params->buffers_size[2];
24110  }
24111  else{
24112  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24113  }
24114  }
24115  cl_int error = 0;
24116  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24117  checkError(error);
24118  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24119  checkError(error);
24120  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24121  checkError(error);
24122  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24123  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24124  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24125  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24126 
24127  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24128  checkError (error);
24129  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24130  checkError (error);
24131  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24132  checkError (error);
24133 
24134  size_t size[3] = {sz, sz2, sz3};
24135  size_t work_dimension = 3;
24136 
24137  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24138  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24139  work_dimension = 1;
24140  }
24141  else if(temp_sz > 0){
24142  if (params->multi_dimensional){
24143  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24144  For default multidimensional global work size, leave the global_work_size vector empty, \
24145  and set multi_dimensional to true. Setting the global work size based on the values inside \
24146  the global_work_size vector.");
24147  }
24148  if (temp_sz == 1){
24149  size[0] = params->global_work_size[0];
24150  work_dimension = 1;
24151  }
24152  else if (temp_sz == 2){
24153  size[0] = params->global_work_size[0];
24154  size[1] = params->global_work_size[1];
24155  work_dimension = 2;
24156  }
24157  else{
24158  size[0] = params->global_work_size[0];
24159  size[1] = params->global_work_size[1];
24160  size[2] = params->global_work_size[2];
24161  if (temp_sz > 3){
24162  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24163  }
24164  }
24165  }
24166 
24167  cl_event gpuExec;
24168 
24169  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24170 
24171  clWaitForEvents(1, &gpuExec);
24172 
24173  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24174 
24175  clWaitForEvents(1, &gpuExec);
24176 
24177  float *result = (float *) malloc(typesz);
24178  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24179 
24180  v->assign(result, result+sz);
24181 
24182  clReleaseCommandQueue (queue);
24183  clReleaseMemObject(buffer);
24184  clReleaseMemObject(buffer2);
24185  clReleaseMemObject(buffer3);
24186  clReleaseEvent(gpuExec);
24187  free(result);
24188  }
24189 
24190  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
24191  size_t sz = v->size();
24192  size_t sz2 = v2->size();
24193  size_t sz3 = v3.size();
24194  size_t typesz = sizeof(float) * sz;
24195  size_t typesz2 = sizeof(double) * sz2;
24196  size_t typesz3 = sizeof(float) * sz3;
24197  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24198  if (temp_sz > 0){
24199  if (temp_sz > 2){
24200  if (temp_sz > 3){
24201  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24202  }
24203  typesz = sizeof(float) * params->buffers_size[0];
24204  typesz2 = sizeof(double) * params->buffers_size[1];
24205  typesz3 = sizeof(float) * params->buffers_size[2];
24206  }
24207  else{
24208  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24209  }
24210  }
24211  cl_int error = 0;
24212  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24213  checkError(error);
24214  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24215  checkError(error);
24216  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24217  checkError(error);
24218  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24219  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24220  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24221  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24222 
24223  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24224  checkError (error);
24225  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24226  checkError (error);
24227  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24228  checkError (error);
24229 
24230  size_t size[3] = {sz, sz2, sz3};
24231  size_t work_dimension = 3;
24232 
24233  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24234  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24235  work_dimension = 1;
24236  }
24237  else if(temp_sz > 0){
24238  if (params->multi_dimensional){
24239  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24240  For default multidimensional global work size, leave the global_work_size vector empty, \
24241  and set multi_dimensional to true. Setting the global work size based on the values inside \
24242  the global_work_size vector.");
24243  }
24244  if (temp_sz == 1){
24245  size[0] = params->global_work_size[0];
24246  work_dimension = 1;
24247  }
24248  else if (temp_sz == 2){
24249  size[0] = params->global_work_size[0];
24250  size[1] = params->global_work_size[1];
24251  work_dimension = 2;
24252  }
24253  else{
24254  size[0] = params->global_work_size[0];
24255  size[1] = params->global_work_size[1];
24256  size[2] = params->global_work_size[2];
24257  if (temp_sz > 3){
24258  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24259  }
24260  }
24261  }
24262 
24263  cl_event gpuExec;
24264 
24265  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24266 
24267  clWaitForEvents(1, &gpuExec);
24268 
24269  float *result = (float *) malloc(typesz);
24270  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24271 
24272  v->assign(result, result+sz);
24273 
24274  if (typesz2 != typesz or sz != sz2){
24275  double *result2;
24276  result2 = (double *) malloc(typesz2);
24277  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24278 
24279  v2->assign(result2, result2+sz2);
24280  free(result2);
24281  }
24282  else{
24283  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24284 
24285  v2->assign(result, result+sz2);
24286  }
24287 
24288  clReleaseCommandQueue (queue);
24289  clReleaseMemObject(buffer);
24290  clReleaseMemObject(buffer2);
24291  clReleaseMemObject(buffer3);
24292  clReleaseEvent(gpuExec);
24293  free(result);
24294  }
24295 
24296  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
24297  size_t sz = v->size();
24298  size_t sz2 = v2->size();
24299  size_t sz3 = v3->size();
24300  size_t typesz = sizeof(float) * sz;
24301  size_t typesz2 = sizeof(double) * sz2;
24302  size_t typesz3 = sizeof(float) * sz3;
24303  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24304  if (temp_sz > 0){
24305  if (temp_sz > 2){
24306  if (temp_sz > 3){
24307  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24308  }
24309  typesz = sizeof(float) * params->buffers_size[0];
24310  typesz2 = sizeof(double) * params->buffers_size[1];
24311  typesz3 = sizeof(float) * params->buffers_size[2];
24312  }
24313  else{
24314  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24315  }
24316  }
24317  cl_int error = 0;
24318  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24319  checkError(error);
24320  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24321  checkError(error);
24322  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
24323  checkError(error);
24324  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24325  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24326  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24327  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24328 
24329  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24330  checkError (error);
24331  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24332  checkError (error);
24333  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
24334  checkError (error);
24335 
24336  size_t size[3] = {sz, sz2, sz3};
24337  size_t work_dimension = 3;
24338 
24339  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24340  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24341  work_dimension = 1;
24342  }
24343  else if(temp_sz > 0){
24344  if (params->multi_dimensional){
24345  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24346  For default multidimensional global work size, leave the global_work_size vector empty, \
24347  and set multi_dimensional to true. Setting the global work size based on the values inside \
24348  the global_work_size vector.");
24349  }
24350  if (temp_sz == 1){
24351  size[0] = params->global_work_size[0];
24352  work_dimension = 1;
24353  }
24354  else if (temp_sz == 2){
24355  size[0] = params->global_work_size[0];
24356  size[1] = params->global_work_size[1];
24357  work_dimension = 2;
24358  }
24359  else{
24360  size[0] = params->global_work_size[0];
24361  size[1] = params->global_work_size[1];
24362  size[2] = params->global_work_size[2];
24363  if (temp_sz > 3){
24364  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24365  }
24366  }
24367  }
24368 
24369  cl_event gpuExec;
24370 
24371  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24372 
24373  clWaitForEvents(1, &gpuExec);
24374 
24375  float *result = (float *) malloc(typesz);
24376  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24377 
24378  v->assign(result, result+sz);
24379 
24380  if (typesz2 != typesz or sz != sz2){
24381  double *result2;
24382  result2 = (double *) malloc(typesz2);
24383  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24384 
24385  v2->assign(result2, result2+sz2);
24386  free(result2);
24387  }
24388  else{
24389  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24390 
24391  v2->assign(result, result+sz2);
24392  }
24393 
24394  if (typesz3 != typesz or sz != sz3){
24395  float *result3;
24396  result3 = (float *) malloc(typesz3);
24397  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
24398 
24399  v3->assign(result3, result3+sz3);
24400  free(result3);
24401  }
24402  else{
24403  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24404 
24405  v3->assign(result, result+sz3);
24406  }
24407 
24408  clReleaseCommandQueue (queue);
24409  clReleaseMemObject(buffer);
24410  clReleaseMemObject(buffer2);
24411  clReleaseMemObject(buffer3);
24412  clReleaseEvent(gpuExec);
24413  free(result);
24414  }
24415 
24416 
24417  std::vector<float> ROS_OpenCL::process(const std::vector<float> v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
24418  size_t sz = v.size();
24419  size_t sz2 = v2.size();
24420  size_t sz3 = v3.size();
24421  size_t typesz = sizeof(float) * sz;
24422  size_t typesz2 = sizeof(double) * sz2;
24423  size_t typesz3 = sizeof(double) * sz3;
24424  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24425  if (temp_sz > 0){
24426  if (temp_sz > 2){
24427  if (temp_sz > 3){
24428  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24429  }
24430  typesz = sizeof(float) * params->buffers_size[0];
24431  typesz2 = sizeof(double) * params->buffers_size[1];
24432  typesz3 = sizeof(double) * params->buffers_size[2];
24433  }
24434  else{
24435  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24436  }
24437  }
24438  cl_int error = 0;
24439  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24440  checkError(error);
24441  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24442  checkError(error);
24443  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24444  checkError(error);
24445  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24446  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24447  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24448  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24449 
24450  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24451  checkError (error);
24452  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24453  checkError (error);
24454  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24455  checkError (error);
24456 
24457  size_t size[3] = {sz, sz2, sz3};
24458  size_t work_dimension = 3;
24459 
24460  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24461  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24462  work_dimension = 1;
24463  }
24464  else if(temp_sz > 0){
24465  if (params->multi_dimensional){
24466  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24467  For default multidimensional global work size, leave the global_work_size vector empty, \
24468  and set multi_dimensional to true. Setting the global work size based on the values inside \
24469  the global_work_size vector.");
24470  }
24471  if (temp_sz == 1){
24472  size[0] = params->global_work_size[0];
24473  work_dimension = 1;
24474  }
24475  else if (temp_sz == 2){
24476  size[0] = params->global_work_size[0];
24477  size[1] = params->global_work_size[1];
24478  work_dimension = 2;
24479  }
24480  else{
24481  size[0] = params->global_work_size[0];
24482  size[1] = params->global_work_size[1];
24483  size[2] = params->global_work_size[2];
24484  if (temp_sz > 3){
24485  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24486  }
24487  }
24488  }
24489 
24490  cl_event gpuExec;
24491 
24492  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24493 
24494  clWaitForEvents(1, &gpuExec);
24495 
24496  float *result = (float *) malloc(typesz);
24497  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24498 
24499  std::vector<float> res = std::vector<float>();
24500  res.assign(result, result+sz);
24501 
24502  clReleaseCommandQueue (queue);
24503  clReleaseMemObject(buffer);
24504  clReleaseMemObject(buffer2);
24505  clReleaseMemObject(buffer3);
24506  clReleaseEvent(gpuExec);
24507  free(result);
24508 
24509  return res;
24510  }
24511 
24512  void ROS_OpenCL::process(std::vector<float>* v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
24513  size_t sz = v->size();
24514  size_t sz2 = v2.size();
24515  size_t sz3 = v3.size();
24516  size_t typesz = sizeof(float) * sz;
24517  size_t typesz2 = sizeof(double) * sz2;
24518  size_t typesz3 = sizeof(double) * sz3;
24519  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24520  if (temp_sz > 0){
24521  if (temp_sz > 2){
24522  if (temp_sz > 3){
24523  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24524  }
24525  typesz = sizeof(float) * params->buffers_size[0];
24526  typesz2 = sizeof(double) * params->buffers_size[1];
24527  typesz3 = sizeof(double) * params->buffers_size[2];
24528  }
24529  else{
24530  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24531  }
24532  }
24533  cl_int error = 0;
24534  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24535  checkError(error);
24536  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24537  checkError(error);
24538  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24539  checkError(error);
24540  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24541  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24542  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24543  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24544 
24545  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24546  checkError (error);
24547  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24548  checkError (error);
24549  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24550  checkError (error);
24551 
24552  size_t size[3] = {sz, sz2, sz3};
24553  size_t work_dimension = 3;
24554 
24555  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24556  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24557  work_dimension = 1;
24558  }
24559  else if(temp_sz > 0){
24560  if (params->multi_dimensional){
24561  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24562  For default multidimensional global work size, leave the global_work_size vector empty, \
24563  and set multi_dimensional to true. Setting the global work size based on the values inside \
24564  the global_work_size vector.");
24565  }
24566  if (temp_sz == 1){
24567  size[0] = params->global_work_size[0];
24568  work_dimension = 1;
24569  }
24570  else if (temp_sz == 2){
24571  size[0] = params->global_work_size[0];
24572  size[1] = params->global_work_size[1];
24573  work_dimension = 2;
24574  }
24575  else{
24576  size[0] = params->global_work_size[0];
24577  size[1] = params->global_work_size[1];
24578  size[2] = params->global_work_size[2];
24579  if (temp_sz > 3){
24580  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24581  }
24582  }
24583  }
24584 
24585  cl_event gpuExec;
24586 
24587  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24588 
24589  clWaitForEvents(1, &gpuExec);
24590 
24591  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24592 
24593  clWaitForEvents(1, &gpuExec);
24594 
24595  float *result = (float *) malloc(typesz);
24596  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24597 
24598  v->assign(result, result+sz);
24599 
24600  clReleaseCommandQueue (queue);
24601  clReleaseMemObject(buffer);
24602  clReleaseMemObject(buffer2);
24603  clReleaseMemObject(buffer3);
24604  clReleaseEvent(gpuExec);
24605  free(result);
24606  }
24607 
24608  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
24609  size_t sz = v->size();
24610  size_t sz2 = v2->size();
24611  size_t sz3 = v3.size();
24612  size_t typesz = sizeof(float) * sz;
24613  size_t typesz2 = sizeof(double) * sz2;
24614  size_t typesz3 = sizeof(double) * sz3;
24615  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24616  if (temp_sz > 0){
24617  if (temp_sz > 2){
24618  if (temp_sz > 3){
24619  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24620  }
24621  typesz = sizeof(float) * params->buffers_size[0];
24622  typesz2 = sizeof(double) * params->buffers_size[1];
24623  typesz3 = sizeof(double) * params->buffers_size[2];
24624  }
24625  else{
24626  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24627  }
24628  }
24629  cl_int error = 0;
24630  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24631  checkError(error);
24632  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24633  checkError(error);
24634  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24635  checkError(error);
24636  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24637  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24638  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24639  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24640 
24641  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24642  checkError (error);
24643  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24644  checkError (error);
24645  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24646  checkError (error);
24647 
24648  size_t size[3] = {sz, sz2, sz3};
24649  size_t work_dimension = 3;
24650 
24651  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24652  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24653  work_dimension = 1;
24654  }
24655  else if(temp_sz > 0){
24656  if (params->multi_dimensional){
24657  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24658  For default multidimensional global work size, leave the global_work_size vector empty, \
24659  and set multi_dimensional to true. Setting the global work size based on the values inside \
24660  the global_work_size vector.");
24661  }
24662  if (temp_sz == 1){
24663  size[0] = params->global_work_size[0];
24664  work_dimension = 1;
24665  }
24666  else if (temp_sz == 2){
24667  size[0] = params->global_work_size[0];
24668  size[1] = params->global_work_size[1];
24669  work_dimension = 2;
24670  }
24671  else{
24672  size[0] = params->global_work_size[0];
24673  size[1] = params->global_work_size[1];
24674  size[2] = params->global_work_size[2];
24675  if (temp_sz > 3){
24676  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24677  }
24678  }
24679  }
24680 
24681  cl_event gpuExec;
24682 
24683  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24684 
24685  clWaitForEvents(1, &gpuExec);
24686 
24687  float *result = (float *) malloc(typesz);
24688  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24689 
24690  v->assign(result, result+sz);
24691 
24692  if (typesz2 != typesz or sz != sz2){
24693  double *result2;
24694  result2 = (double *) malloc(typesz2);
24695  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24696 
24697  v2->assign(result2, result2+sz2);
24698  free(result2);
24699  }
24700  else{
24701  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24702 
24703  v2->assign(result, result+sz2);
24704  }
24705 
24706  clReleaseCommandQueue (queue);
24707  clReleaseMemObject(buffer);
24708  clReleaseMemObject(buffer2);
24709  clReleaseMemObject(buffer3);
24710  clReleaseEvent(gpuExec);
24711  free(result);
24712  }
24713 
24714  void ROS_OpenCL::process(std::vector<float>* v, std::vector<double>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
24715  size_t sz = v->size();
24716  size_t sz2 = v2->size();
24717  size_t sz3 = v3->size();
24718  size_t typesz = sizeof(float) * sz;
24719  size_t typesz2 = sizeof(double) * sz2;
24720  size_t typesz3 = sizeof(double) * sz3;
24721  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24722  if (temp_sz > 0){
24723  if (temp_sz > 2){
24724  if (temp_sz > 3){
24725  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24726  }
24727  typesz = sizeof(float) * params->buffers_size[0];
24728  typesz2 = sizeof(double) * params->buffers_size[1];
24729  typesz3 = sizeof(double) * params->buffers_size[2];
24730  }
24731  else{
24732  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24733  }
24734  }
24735  cl_int error = 0;
24736  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24737  checkError(error);
24738  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24739  checkError(error);
24740  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
24741  checkError(error);
24742  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24743  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24744  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24745  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24746 
24747  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24748  checkError (error);
24749  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24750  checkError (error);
24751  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
24752  checkError (error);
24753 
24754  size_t size[3] = {sz, sz2, sz3};
24755  size_t work_dimension = 3;
24756 
24757  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24758  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24759  work_dimension = 1;
24760  }
24761  else if(temp_sz > 0){
24762  if (params->multi_dimensional){
24763  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24764  For default multidimensional global work size, leave the global_work_size vector empty, \
24765  and set multi_dimensional to true. Setting the global work size based on the values inside \
24766  the global_work_size vector.");
24767  }
24768  if (temp_sz == 1){
24769  size[0] = params->global_work_size[0];
24770  work_dimension = 1;
24771  }
24772  else if (temp_sz == 2){
24773  size[0] = params->global_work_size[0];
24774  size[1] = params->global_work_size[1];
24775  work_dimension = 2;
24776  }
24777  else{
24778  size[0] = params->global_work_size[0];
24779  size[1] = params->global_work_size[1];
24780  size[2] = params->global_work_size[2];
24781  if (temp_sz > 3){
24782  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24783  }
24784  }
24785  }
24786 
24787  cl_event gpuExec;
24788 
24789  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24790 
24791  clWaitForEvents(1, &gpuExec);
24792 
24793  float *result = (float *) malloc(typesz);
24794  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24795 
24796  v->assign(result, result+sz);
24797 
24798  if (typesz2 != typesz or sz != sz2){
24799  double *result2;
24800  result2 = (double *) malloc(typesz2);
24801  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24802 
24803  v2->assign(result2, result2+sz2);
24804  free(result2);
24805  }
24806  else{
24807  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24808 
24809  v2->assign(result, result+sz2);
24810  }
24811 
24812  if (typesz3 != typesz or sz != sz3){
24813  double *result3;
24814  result3 = (double *) malloc(typesz3);
24815  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
24816 
24817  v3->assign(result3, result3+sz3);
24818  free(result3);
24819  }
24820  else{
24821  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24822 
24823  v3->assign(result, result+sz3);
24824  }
24825 
24826  clReleaseCommandQueue (queue);
24827  clReleaseMemObject(buffer);
24828  clReleaseMemObject(buffer2);
24829  clReleaseMemObject(buffer3);
24830  clReleaseEvent(gpuExec);
24831  free(result);
24832  }
24833 
24834 
24835  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
24836  size_t sz = v.size();
24837  size_t sz2 = v2.size();
24838  size_t sz3 = v3.size();
24839  size_t typesz = sizeof(double) * sz;
24840  size_t typesz2 = sizeof(char) * sz2;
24841  size_t typesz3 = sizeof(char) * sz3;
24842  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24843  if (temp_sz > 0){
24844  if (temp_sz > 2){
24845  if (temp_sz > 3){
24846  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24847  }
24848  typesz = sizeof(double) * params->buffers_size[0];
24849  typesz2 = sizeof(char) * params->buffers_size[1];
24850  typesz3 = sizeof(char) * params->buffers_size[2];
24851  }
24852  else{
24853  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24854  }
24855  }
24856  cl_int error = 0;
24857  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24858  checkError(error);
24859  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24860  checkError(error);
24861  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24862  checkError(error);
24863  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24864  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24865  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24866  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24867 
24868  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24869  checkError (error);
24870  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24871  checkError (error);
24872  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24873  checkError (error);
24874 
24875  size_t size[3] = {sz, sz2, sz3};
24876  size_t work_dimension = 3;
24877 
24878  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24879  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24880  work_dimension = 1;
24881  }
24882  else if(temp_sz > 0){
24883  if (params->multi_dimensional){
24884  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24885  For default multidimensional global work size, leave the global_work_size vector empty, \
24886  and set multi_dimensional to true. Setting the global work size based on the values inside \
24887  the global_work_size vector.");
24888  }
24889  if (temp_sz == 1){
24890  size[0] = params->global_work_size[0];
24891  work_dimension = 1;
24892  }
24893  else if (temp_sz == 2){
24894  size[0] = params->global_work_size[0];
24895  size[1] = params->global_work_size[1];
24896  work_dimension = 2;
24897  }
24898  else{
24899  size[0] = params->global_work_size[0];
24900  size[1] = params->global_work_size[1];
24901  size[2] = params->global_work_size[2];
24902  if (temp_sz > 3){
24903  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24904  }
24905  }
24906  }
24907 
24908  cl_event gpuExec;
24909 
24910  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24911 
24912  clWaitForEvents(1, &gpuExec);
24913 
24914  double *result = (double *) malloc(typesz);
24915  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24916 
24917  std::vector<double> res = std::vector<double>();
24918  res.assign(result, result+sz);
24919 
24920  clReleaseCommandQueue (queue);
24921  clReleaseMemObject(buffer);
24922  clReleaseMemObject(buffer2);
24923  clReleaseMemObject(buffer3);
24924  clReleaseEvent(gpuExec);
24925  free(result);
24926 
24927  return res;
24928  }
24929 
24930  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<char> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
24931  size_t sz = v->size();
24932  size_t sz2 = v2.size();
24933  size_t sz3 = v3.size();
24934  size_t typesz = sizeof(double) * sz;
24935  size_t typesz2 = sizeof(char) * sz2;
24936  size_t typesz3 = sizeof(char) * sz3;
24937  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
24938  if (temp_sz > 0){
24939  if (temp_sz > 2){
24940  if (temp_sz > 3){
24941  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24942  }
24943  typesz = sizeof(double) * params->buffers_size[0];
24944  typesz2 = sizeof(char) * params->buffers_size[1];
24945  typesz3 = sizeof(char) * params->buffers_size[2];
24946  }
24947  else{
24948  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24949  }
24950  }
24951  cl_int error = 0;
24952  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24953  checkError(error);
24954  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24955  checkError(error);
24956  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24957  checkError(error);
24958  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
24959  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
24960  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
24961  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
24962 
24963  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24964  checkError (error);
24965  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24966  checkError (error);
24967  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24968  checkError (error);
24969 
24970  size_t size[3] = {sz, sz2, sz3};
24971  size_t work_dimension = 3;
24972 
24973  temp_sz = params != NULL ? params->global_work_size.size() : 0;
24974  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
24975  work_dimension = 1;
24976  }
24977  else if(temp_sz > 0){
24978  if (params->multi_dimensional){
24979  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
24980  For default multidimensional global work size, leave the global_work_size vector empty, \
24981  and set multi_dimensional to true. Setting the global work size based on the values inside \
24982  the global_work_size vector.");
24983  }
24984  if (temp_sz == 1){
24985  size[0] = params->global_work_size[0];
24986  work_dimension = 1;
24987  }
24988  else if (temp_sz == 2){
24989  size[0] = params->global_work_size[0];
24990  size[1] = params->global_work_size[1];
24991  work_dimension = 2;
24992  }
24993  else{
24994  size[0] = params->global_work_size[0];
24995  size[1] = params->global_work_size[1];
24996  size[2] = params->global_work_size[2];
24997  if (temp_sz > 3){
24998  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24999  }
25000  }
25001  }
25002 
25003  cl_event gpuExec;
25004 
25005  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25006 
25007  clWaitForEvents(1, &gpuExec);
25008 
25009  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25010 
25011  clWaitForEvents(1, &gpuExec);
25012 
25013  double *result = (double *) malloc(typesz);
25014  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25015 
25016  v->assign(result, result+sz);
25017 
25018  clReleaseCommandQueue (queue);
25019  clReleaseMemObject(buffer);
25020  clReleaseMemObject(buffer2);
25021  clReleaseMemObject(buffer3);
25022  clReleaseEvent(gpuExec);
25023  free(result);
25024  }
25025 
25026  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
25027  size_t sz = v->size();
25028  size_t sz2 = v2->size();
25029  size_t sz3 = v3.size();
25030  size_t typesz = sizeof(double) * sz;
25031  size_t typesz2 = sizeof(char) * sz2;
25032  size_t typesz3 = sizeof(char) * sz3;
25033  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25034  if (temp_sz > 0){
25035  if (temp_sz > 2){
25036  if (temp_sz > 3){
25037  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25038  }
25039  typesz = sizeof(double) * params->buffers_size[0];
25040  typesz2 = sizeof(char) * params->buffers_size[1];
25041  typesz3 = sizeof(char) * params->buffers_size[2];
25042  }
25043  else{
25044  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25045  }
25046  }
25047  cl_int error = 0;
25048  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25049  checkError(error);
25050  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25051  checkError(error);
25052  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25053  checkError(error);
25054  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25055  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25056  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25057  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25058 
25059  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25060  checkError (error);
25061  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25062  checkError (error);
25063  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25064  checkError (error);
25065 
25066  size_t size[3] = {sz, sz2, sz3};
25067  size_t work_dimension = 3;
25068 
25069  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25070  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25071  work_dimension = 1;
25072  }
25073  else if(temp_sz > 0){
25074  if (params->multi_dimensional){
25075  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25076  For default multidimensional global work size, leave the global_work_size vector empty, \
25077  and set multi_dimensional to true. Setting the global work size based on the values inside \
25078  the global_work_size vector.");
25079  }
25080  if (temp_sz == 1){
25081  size[0] = params->global_work_size[0];
25082  work_dimension = 1;
25083  }
25084  else if (temp_sz == 2){
25085  size[0] = params->global_work_size[0];
25086  size[1] = params->global_work_size[1];
25087  work_dimension = 2;
25088  }
25089  else{
25090  size[0] = params->global_work_size[0];
25091  size[1] = params->global_work_size[1];
25092  size[2] = params->global_work_size[2];
25093  if (temp_sz > 3){
25094  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25095  }
25096  }
25097  }
25098 
25099  cl_event gpuExec;
25100 
25101  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25102 
25103  clWaitForEvents(1, &gpuExec);
25104 
25105  double *result = (double *) malloc(typesz);
25106  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25107 
25108  v->assign(result, result+sz);
25109 
25110  if (typesz2 != typesz or sz != sz2){
25111  char *result2;
25112  result2 = (char *) malloc(typesz2);
25113  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25114 
25115  v2->assign(result2, result2+sz2);
25116  free(result2);
25117  }
25118  else{
25119  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25120 
25121  v2->assign(result, result+sz2);
25122  }
25123 
25124  clReleaseCommandQueue (queue);
25125  clReleaseMemObject(buffer);
25126  clReleaseMemObject(buffer2);
25127  clReleaseMemObject(buffer3);
25128  clReleaseEvent(gpuExec);
25129  free(result);
25130  }
25131 
25132  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
25133  size_t sz = v->size();
25134  size_t sz2 = v2->size();
25135  size_t sz3 = v3->size();
25136  size_t typesz = sizeof(double) * sz;
25137  size_t typesz2 = sizeof(char) * sz2;
25138  size_t typesz3 = sizeof(char) * sz3;
25139  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25140  if (temp_sz > 0){
25141  if (temp_sz > 2){
25142  if (temp_sz > 3){
25143  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25144  }
25145  typesz = sizeof(double) * params->buffers_size[0];
25146  typesz2 = sizeof(char) * params->buffers_size[1];
25147  typesz3 = sizeof(char) * params->buffers_size[2];
25148  }
25149  else{
25150  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25151  }
25152  }
25153  cl_int error = 0;
25154  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25155  checkError(error);
25156  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25157  checkError(error);
25158  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25159  checkError(error);
25160  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25161  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25162  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25163  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25164 
25165  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25166  checkError (error);
25167  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25168  checkError (error);
25169  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
25170  checkError (error);
25171 
25172  size_t size[3] = {sz, sz2, sz3};
25173  size_t work_dimension = 3;
25174 
25175  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25176  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25177  work_dimension = 1;
25178  }
25179  else if(temp_sz > 0){
25180  if (params->multi_dimensional){
25181  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25182  For default multidimensional global work size, leave the global_work_size vector empty, \
25183  and set multi_dimensional to true. Setting the global work size based on the values inside \
25184  the global_work_size vector.");
25185  }
25186  if (temp_sz == 1){
25187  size[0] = params->global_work_size[0];
25188  work_dimension = 1;
25189  }
25190  else if (temp_sz == 2){
25191  size[0] = params->global_work_size[0];
25192  size[1] = params->global_work_size[1];
25193  work_dimension = 2;
25194  }
25195  else{
25196  size[0] = params->global_work_size[0];
25197  size[1] = params->global_work_size[1];
25198  size[2] = params->global_work_size[2];
25199  if (temp_sz > 3){
25200  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25201  }
25202  }
25203  }
25204 
25205  cl_event gpuExec;
25206 
25207  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25208 
25209  clWaitForEvents(1, &gpuExec);
25210 
25211  double *result = (double *) malloc(typesz);
25212  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25213 
25214  v->assign(result, result+sz);
25215 
25216  if (typesz2 != typesz or sz != sz2){
25217  char *result2;
25218  result2 = (char *) malloc(typesz2);
25219  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25220 
25221  v2->assign(result2, result2+sz2);
25222  free(result2);
25223  }
25224  else{
25225  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25226 
25227  v2->assign(result, result+sz2);
25228  }
25229 
25230  if (typesz3 != typesz or sz != sz3){
25231  char *result3;
25232  result3 = (char *) malloc(typesz3);
25233  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
25234 
25235  v3->assign(result3, result3+sz3);
25236  free(result3);
25237  }
25238  else{
25239  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25240 
25241  v3->assign(result, result+sz3);
25242  }
25243 
25244  clReleaseCommandQueue (queue);
25245  clReleaseMemObject(buffer);
25246  clReleaseMemObject(buffer2);
25247  clReleaseMemObject(buffer3);
25248  clReleaseEvent(gpuExec);
25249  free(result);
25250  }
25251 
25252 
25253  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
25254  size_t sz = v.size();
25255  size_t sz2 = v2.size();
25256  size_t sz3 = v3.size();
25257  size_t typesz = sizeof(double) * sz;
25258  size_t typesz2 = sizeof(char) * sz2;
25259  size_t typesz3 = sizeof(int) * sz3;
25260  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25261  if (temp_sz > 0){
25262  if (temp_sz > 2){
25263  if (temp_sz > 3){
25264  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25265  }
25266  typesz = sizeof(double) * params->buffers_size[0];
25267  typesz2 = sizeof(char) * params->buffers_size[1];
25268  typesz3 = sizeof(int) * params->buffers_size[2];
25269  }
25270  else{
25271  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25272  }
25273  }
25274  cl_int error = 0;
25275  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25276  checkError(error);
25277  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25278  checkError(error);
25279  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25280  checkError(error);
25281  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25282  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25283  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25284  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25285 
25286  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
25287  checkError (error);
25288  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25289  checkError (error);
25290  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25291  checkError (error);
25292 
25293  size_t size[3] = {sz, sz2, sz3};
25294  size_t work_dimension = 3;
25295 
25296  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25297  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25298  work_dimension = 1;
25299  }
25300  else if(temp_sz > 0){
25301  if (params->multi_dimensional){
25302  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25303  For default multidimensional global work size, leave the global_work_size vector empty, \
25304  and set multi_dimensional to true. Setting the global work size based on the values inside \
25305  the global_work_size vector.");
25306  }
25307  if (temp_sz == 1){
25308  size[0] = params->global_work_size[0];
25309  work_dimension = 1;
25310  }
25311  else if (temp_sz == 2){
25312  size[0] = params->global_work_size[0];
25313  size[1] = params->global_work_size[1];
25314  work_dimension = 2;
25315  }
25316  else{
25317  size[0] = params->global_work_size[0];
25318  size[1] = params->global_work_size[1];
25319  size[2] = params->global_work_size[2];
25320  if (temp_sz > 3){
25321  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25322  }
25323  }
25324  }
25325 
25326  cl_event gpuExec;
25327 
25328  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25329 
25330  clWaitForEvents(1, &gpuExec);
25331 
25332  double *result = (double *) malloc(typesz);
25333  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25334 
25335  std::vector<double> res = std::vector<double>();
25336  res.assign(result, result+sz);
25337 
25338  clReleaseCommandQueue (queue);
25339  clReleaseMemObject(buffer);
25340  clReleaseMemObject(buffer2);
25341  clReleaseMemObject(buffer3);
25342  clReleaseEvent(gpuExec);
25343  free(result);
25344 
25345  return res;
25346  }
25347 
25348  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<char> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
25349  size_t sz = v->size();
25350  size_t sz2 = v2.size();
25351  size_t sz3 = v3.size();
25352  size_t typesz = sizeof(double) * sz;
25353  size_t typesz2 = sizeof(char) * sz2;
25354  size_t typesz3 = sizeof(int) * sz3;
25355  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25356  if (temp_sz > 0){
25357  if (temp_sz > 2){
25358  if (temp_sz > 3){
25359  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25360  }
25361  typesz = sizeof(double) * params->buffers_size[0];
25362  typesz2 = sizeof(char) * params->buffers_size[1];
25363  typesz3 = sizeof(int) * params->buffers_size[2];
25364  }
25365  else{
25366  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25367  }
25368  }
25369  cl_int error = 0;
25370  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25371  checkError(error);
25372  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25373  checkError(error);
25374  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25375  checkError(error);
25376  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25377  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25378  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25379  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25380 
25381  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25382  checkError (error);
25383  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25384  checkError (error);
25385  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25386  checkError (error);
25387 
25388  size_t size[3] = {sz, sz2, sz3};
25389  size_t work_dimension = 3;
25390 
25391  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25392  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25393  work_dimension = 1;
25394  }
25395  else if(temp_sz > 0){
25396  if (params->multi_dimensional){
25397  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25398  For default multidimensional global work size, leave the global_work_size vector empty, \
25399  and set multi_dimensional to true. Setting the global work size based on the values inside \
25400  the global_work_size vector.");
25401  }
25402  if (temp_sz == 1){
25403  size[0] = params->global_work_size[0];
25404  work_dimension = 1;
25405  }
25406  else if (temp_sz == 2){
25407  size[0] = params->global_work_size[0];
25408  size[1] = params->global_work_size[1];
25409  work_dimension = 2;
25410  }
25411  else{
25412  size[0] = params->global_work_size[0];
25413  size[1] = params->global_work_size[1];
25414  size[2] = params->global_work_size[2];
25415  if (temp_sz > 3){
25416  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25417  }
25418  }
25419  }
25420 
25421  cl_event gpuExec;
25422 
25423  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25424 
25425  clWaitForEvents(1, &gpuExec);
25426 
25427  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25428 
25429  clWaitForEvents(1, &gpuExec);
25430 
25431  double *result = (double *) malloc(typesz);
25432  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25433 
25434  v->assign(result, result+sz);
25435 
25436  clReleaseCommandQueue (queue);
25437  clReleaseMemObject(buffer);
25438  clReleaseMemObject(buffer2);
25439  clReleaseMemObject(buffer3);
25440  clReleaseEvent(gpuExec);
25441  free(result);
25442  }
25443 
25444  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
25445  size_t sz = v->size();
25446  size_t sz2 = v2->size();
25447  size_t sz3 = v3.size();
25448  size_t typesz = sizeof(double) * sz;
25449  size_t typesz2 = sizeof(char) * sz2;
25450  size_t typesz3 = sizeof(int) * sz3;
25451  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25452  if (temp_sz > 0){
25453  if (temp_sz > 2){
25454  if (temp_sz > 3){
25455  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25456  }
25457  typesz = sizeof(double) * params->buffers_size[0];
25458  typesz2 = sizeof(char) * params->buffers_size[1];
25459  typesz3 = sizeof(int) * params->buffers_size[2];
25460  }
25461  else{
25462  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25463  }
25464  }
25465  cl_int error = 0;
25466  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25467  checkError(error);
25468  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25469  checkError(error);
25470  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25471  checkError(error);
25472  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25473  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25474  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25475  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25476 
25477  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25478  checkError (error);
25479  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25480  checkError (error);
25481  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25482  checkError (error);
25483 
25484  size_t size[3] = {sz, sz2, sz3};
25485  size_t work_dimension = 3;
25486 
25487  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25488  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25489  work_dimension = 1;
25490  }
25491  else if(temp_sz > 0){
25492  if (params->multi_dimensional){
25493  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25494  For default multidimensional global work size, leave the global_work_size vector empty, \
25495  and set multi_dimensional to true. Setting the global work size based on the values inside \
25496  the global_work_size vector.");
25497  }
25498  if (temp_sz == 1){
25499  size[0] = params->global_work_size[0];
25500  work_dimension = 1;
25501  }
25502  else if (temp_sz == 2){
25503  size[0] = params->global_work_size[0];
25504  size[1] = params->global_work_size[1];
25505  work_dimension = 2;
25506  }
25507  else{
25508  size[0] = params->global_work_size[0];
25509  size[1] = params->global_work_size[1];
25510  size[2] = params->global_work_size[2];
25511  if (temp_sz > 3){
25512  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25513  }
25514  }
25515  }
25516 
25517  cl_event gpuExec;
25518 
25519  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25520 
25521  clWaitForEvents(1, &gpuExec);
25522 
25523  double *result = (double *) malloc(typesz);
25524  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25525 
25526  v->assign(result, result+sz);
25527 
25528  if (typesz2 != typesz or sz != sz2){
25529  char *result2;
25530  result2 = (char *) malloc(typesz2);
25531  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25532 
25533  v2->assign(result2, result2+sz2);
25534  free(result2);
25535  }
25536  else{
25537  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25538 
25539  v2->assign(result, result+sz2);
25540  }
25541 
25542  clReleaseCommandQueue (queue);
25543  clReleaseMemObject(buffer);
25544  clReleaseMemObject(buffer2);
25545  clReleaseMemObject(buffer3);
25546  clReleaseEvent(gpuExec);
25547  free(result);
25548  }
25549 
25550  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
25551  size_t sz = v->size();
25552  size_t sz2 = v2->size();
25553  size_t sz3 = v3->size();
25554  size_t typesz = sizeof(double) * sz;
25555  size_t typesz2 = sizeof(char) * sz2;
25556  size_t typesz3 = sizeof(int) * sz3;
25557  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25558  if (temp_sz > 0){
25559  if (temp_sz > 2){
25560  if (temp_sz > 3){
25561  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25562  }
25563  typesz = sizeof(double) * params->buffers_size[0];
25564  typesz2 = sizeof(char) * params->buffers_size[1];
25565  typesz3 = sizeof(int) * params->buffers_size[2];
25566  }
25567  else{
25568  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25569  }
25570  }
25571  cl_int error = 0;
25572  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25573  checkError(error);
25574  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25575  checkError(error);
25576  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25577  checkError(error);
25578  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25579  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25580  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25581  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25582 
25583  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25584  checkError (error);
25585  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25586  checkError (error);
25587  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
25588  checkError (error);
25589 
25590  size_t size[3] = {sz, sz2, sz3};
25591  size_t work_dimension = 3;
25592 
25593  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25594  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25595  work_dimension = 1;
25596  }
25597  else if(temp_sz > 0){
25598  if (params->multi_dimensional){
25599  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25600  For default multidimensional global work size, leave the global_work_size vector empty, \
25601  and set multi_dimensional to true. Setting the global work size based on the values inside \
25602  the global_work_size vector.");
25603  }
25604  if (temp_sz == 1){
25605  size[0] = params->global_work_size[0];
25606  work_dimension = 1;
25607  }
25608  else if (temp_sz == 2){
25609  size[0] = params->global_work_size[0];
25610  size[1] = params->global_work_size[1];
25611  work_dimension = 2;
25612  }
25613  else{
25614  size[0] = params->global_work_size[0];
25615  size[1] = params->global_work_size[1];
25616  size[2] = params->global_work_size[2];
25617  if (temp_sz > 3){
25618  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25619  }
25620  }
25621  }
25622 
25623  cl_event gpuExec;
25624 
25625  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25626 
25627  clWaitForEvents(1, &gpuExec);
25628 
25629  double *result = (double *) malloc(typesz);
25630  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25631 
25632  v->assign(result, result+sz);
25633 
25634  if (typesz2 != typesz or sz != sz2){
25635  char *result2;
25636  result2 = (char *) malloc(typesz2);
25637  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25638 
25639  v2->assign(result2, result2+sz2);
25640  free(result2);
25641  }
25642  else{
25643  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25644 
25645  v2->assign(result, result+sz2);
25646  }
25647 
25648  if (typesz3 != typesz or sz != sz3){
25649  int *result3;
25650  result3 = (int *) malloc(typesz3);
25651  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
25652 
25653  v3->assign(result3, result3+sz3);
25654  free(result3);
25655  }
25656  else{
25657  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25658 
25659  v3->assign(result, result+sz3);
25660  }
25661 
25662  clReleaseCommandQueue (queue);
25663  clReleaseMemObject(buffer);
25664  clReleaseMemObject(buffer2);
25665  clReleaseMemObject(buffer3);
25666  clReleaseEvent(gpuExec);
25667  free(result);
25668  }
25669 
25670 
25671  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
25672  size_t sz = v.size();
25673  size_t sz2 = v2.size();
25674  size_t sz3 = v3.size();
25675  size_t typesz = sizeof(double) * sz;
25676  size_t typesz2 = sizeof(char) * sz2;
25677  size_t typesz3 = sizeof(float) * sz3;
25678  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25679  if (temp_sz > 0){
25680  if (temp_sz > 2){
25681  if (temp_sz > 3){
25682  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25683  }
25684  typesz = sizeof(double) * params->buffers_size[0];
25685  typesz2 = sizeof(char) * params->buffers_size[1];
25686  typesz3 = sizeof(float) * params->buffers_size[2];
25687  }
25688  else{
25689  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25690  }
25691  }
25692  cl_int error = 0;
25693  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25694  checkError(error);
25695  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25696  checkError(error);
25697  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25698  checkError(error);
25699  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25700  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25701  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25702  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25703 
25704  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
25705  checkError (error);
25706  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25707  checkError (error);
25708  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25709  checkError (error);
25710 
25711  size_t size[3] = {sz, sz2, sz3};
25712  size_t work_dimension = 3;
25713 
25714  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25715  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25716  work_dimension = 1;
25717  }
25718  else if(temp_sz > 0){
25719  if (params->multi_dimensional){
25720  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25721  For default multidimensional global work size, leave the global_work_size vector empty, \
25722  and set multi_dimensional to true. Setting the global work size based on the values inside \
25723  the global_work_size vector.");
25724  }
25725  if (temp_sz == 1){
25726  size[0] = params->global_work_size[0];
25727  work_dimension = 1;
25728  }
25729  else if (temp_sz == 2){
25730  size[0] = params->global_work_size[0];
25731  size[1] = params->global_work_size[1];
25732  work_dimension = 2;
25733  }
25734  else{
25735  size[0] = params->global_work_size[0];
25736  size[1] = params->global_work_size[1];
25737  size[2] = params->global_work_size[2];
25738  if (temp_sz > 3){
25739  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25740  }
25741  }
25742  }
25743 
25744  cl_event gpuExec;
25745 
25746  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25747 
25748  clWaitForEvents(1, &gpuExec);
25749 
25750  double *result = (double *) malloc(typesz);
25751  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25752 
25753  std::vector<double> res = std::vector<double>();
25754  res.assign(result, result+sz);
25755 
25756  clReleaseCommandQueue (queue);
25757  clReleaseMemObject(buffer);
25758  clReleaseMemObject(buffer2);
25759  clReleaseMemObject(buffer3);
25760  clReleaseEvent(gpuExec);
25761  free(result);
25762 
25763  return res;
25764  }
25765 
25766  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<char> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
25767  size_t sz = v->size();
25768  size_t sz2 = v2.size();
25769  size_t sz3 = v3.size();
25770  size_t typesz = sizeof(double) * sz;
25771  size_t typesz2 = sizeof(char) * sz2;
25772  size_t typesz3 = sizeof(float) * sz3;
25773  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25774  if (temp_sz > 0){
25775  if (temp_sz > 2){
25776  if (temp_sz > 3){
25777  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25778  }
25779  typesz = sizeof(double) * params->buffers_size[0];
25780  typesz2 = sizeof(char) * params->buffers_size[1];
25781  typesz3 = sizeof(float) * params->buffers_size[2];
25782  }
25783  else{
25784  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25785  }
25786  }
25787  cl_int error = 0;
25788  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25789  checkError(error);
25790  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25791  checkError(error);
25792  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25793  checkError(error);
25794  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25795  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25796  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25797  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25798 
25799  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25800  checkError (error);
25801  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25802  checkError (error);
25803  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25804  checkError (error);
25805 
25806  size_t size[3] = {sz, sz2, sz3};
25807  size_t work_dimension = 3;
25808 
25809  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25810  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25811  work_dimension = 1;
25812  }
25813  else if(temp_sz > 0){
25814  if (params->multi_dimensional){
25815  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25816  For default multidimensional global work size, leave the global_work_size vector empty, \
25817  and set multi_dimensional to true. Setting the global work size based on the values inside \
25818  the global_work_size vector.");
25819  }
25820  if (temp_sz == 1){
25821  size[0] = params->global_work_size[0];
25822  work_dimension = 1;
25823  }
25824  else if (temp_sz == 2){
25825  size[0] = params->global_work_size[0];
25826  size[1] = params->global_work_size[1];
25827  work_dimension = 2;
25828  }
25829  else{
25830  size[0] = params->global_work_size[0];
25831  size[1] = params->global_work_size[1];
25832  size[2] = params->global_work_size[2];
25833  if (temp_sz > 3){
25834  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25835  }
25836  }
25837  }
25838 
25839  cl_event gpuExec;
25840 
25841  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25842 
25843  clWaitForEvents(1, &gpuExec);
25844 
25845  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25846 
25847  clWaitForEvents(1, &gpuExec);
25848 
25849  double *result = (double *) malloc(typesz);
25850  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25851 
25852  v->assign(result, result+sz);
25853 
25854  clReleaseCommandQueue (queue);
25855  clReleaseMemObject(buffer);
25856  clReleaseMemObject(buffer2);
25857  clReleaseMemObject(buffer3);
25858  clReleaseEvent(gpuExec);
25859  free(result);
25860  }
25861 
25862  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
25863  size_t sz = v->size();
25864  size_t sz2 = v2->size();
25865  size_t sz3 = v3.size();
25866  size_t typesz = sizeof(double) * sz;
25867  size_t typesz2 = sizeof(char) * sz2;
25868  size_t typesz3 = sizeof(float) * sz3;
25869  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25870  if (temp_sz > 0){
25871  if (temp_sz > 2){
25872  if (temp_sz > 3){
25873  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25874  }
25875  typesz = sizeof(double) * params->buffers_size[0];
25876  typesz2 = sizeof(char) * params->buffers_size[1];
25877  typesz3 = sizeof(float) * params->buffers_size[2];
25878  }
25879  else{
25880  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25881  }
25882  }
25883  cl_int error = 0;
25884  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25885  checkError(error);
25886  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25887  checkError(error);
25888  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25889  checkError(error);
25890  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25891  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25892  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25893  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
25894 
25895  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25896  checkError (error);
25897  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25898  checkError (error);
25899  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25900  checkError (error);
25901 
25902  size_t size[3] = {sz, sz2, sz3};
25903  size_t work_dimension = 3;
25904 
25905  temp_sz = params != NULL ? params->global_work_size.size() : 0;
25906  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
25907  work_dimension = 1;
25908  }
25909  else if(temp_sz > 0){
25910  if (params->multi_dimensional){
25911  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
25912  For default multidimensional global work size, leave the global_work_size vector empty, \
25913  and set multi_dimensional to true. Setting the global work size based on the values inside \
25914  the global_work_size vector.");
25915  }
25916  if (temp_sz == 1){
25917  size[0] = params->global_work_size[0];
25918  work_dimension = 1;
25919  }
25920  else if (temp_sz == 2){
25921  size[0] = params->global_work_size[0];
25922  size[1] = params->global_work_size[1];
25923  work_dimension = 2;
25924  }
25925  else{
25926  size[0] = params->global_work_size[0];
25927  size[1] = params->global_work_size[1];
25928  size[2] = params->global_work_size[2];
25929  if (temp_sz > 3){
25930  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25931  }
25932  }
25933  }
25934 
25935  cl_event gpuExec;
25936 
25937  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25938 
25939  clWaitForEvents(1, &gpuExec);
25940 
25941  double *result = (double *) malloc(typesz);
25942  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25943 
25944  v->assign(result, result+sz);
25945 
25946  if (typesz2 != typesz or sz != sz2){
25947  char *result2;
25948  result2 = (char *) malloc(typesz2);
25949  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25950 
25951  v2->assign(result2, result2+sz2);
25952  free(result2);
25953  }
25954  else{
25955  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25956 
25957  v2->assign(result, result+sz2);
25958  }
25959 
25960  clReleaseCommandQueue (queue);
25961  clReleaseMemObject(buffer);
25962  clReleaseMemObject(buffer2);
25963  clReleaseMemObject(buffer3);
25964  clReleaseEvent(gpuExec);
25965  free(result);
25966  }
25967 
25968  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
25969  size_t sz = v->size();
25970  size_t sz2 = v2->size();
25971  size_t sz3 = v3->size();
25972  size_t typesz = sizeof(double) * sz;
25973  size_t typesz2 = sizeof(char) * sz2;
25974  size_t typesz3 = sizeof(float) * sz3;
25975  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
25976  if (temp_sz > 0){
25977  if (temp_sz > 2){
25978  if (temp_sz > 3){
25979  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25980  }
25981  typesz = sizeof(double) * params->buffers_size[0];
25982  typesz2 = sizeof(char) * params->buffers_size[1];
25983  typesz3 = sizeof(float) * params->buffers_size[2];
25984  }
25985  else{
25986  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25987  }
25988  }
25989  cl_int error = 0;
25990  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25991  checkError(error);
25992  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25993  checkError(error);
25994  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25995  checkError(error);
25996  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
25997  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
25998  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
25999  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26000 
26001  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26002  checkError (error);
26003  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26004  checkError (error);
26005  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26006  checkError (error);
26007 
26008  size_t size[3] = {sz, sz2, sz3};
26009  size_t work_dimension = 3;
26010 
26011  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26012  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26013  work_dimension = 1;
26014  }
26015  else if(temp_sz > 0){
26016  if (params->multi_dimensional){
26017  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26018  For default multidimensional global work size, leave the global_work_size vector empty, \
26019  and set multi_dimensional to true. Setting the global work size based on the values inside \
26020  the global_work_size vector.");
26021  }
26022  if (temp_sz == 1){
26023  size[0] = params->global_work_size[0];
26024  work_dimension = 1;
26025  }
26026  else if (temp_sz == 2){
26027  size[0] = params->global_work_size[0];
26028  size[1] = params->global_work_size[1];
26029  work_dimension = 2;
26030  }
26031  else{
26032  size[0] = params->global_work_size[0];
26033  size[1] = params->global_work_size[1];
26034  size[2] = params->global_work_size[2];
26035  if (temp_sz > 3){
26036  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26037  }
26038  }
26039  }
26040 
26041  cl_event gpuExec;
26042 
26043  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26044 
26045  clWaitForEvents(1, &gpuExec);
26046 
26047  double *result = (double *) malloc(typesz);
26048  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26049 
26050  v->assign(result, result+sz);
26051 
26052  if (typesz2 != typesz or sz != sz2){
26053  char *result2;
26054  result2 = (char *) malloc(typesz2);
26055  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26056 
26057  v2->assign(result2, result2+sz2);
26058  free(result2);
26059  }
26060  else{
26061  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26062 
26063  v2->assign(result, result+sz2);
26064  }
26065 
26066  if (typesz3 != typesz or sz != sz3){
26067  float *result3;
26068  result3 = (float *) malloc(typesz3);
26069  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26070 
26071  v3->assign(result3, result3+sz3);
26072  free(result3);
26073  }
26074  else{
26075  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26076 
26077  v3->assign(result, result+sz3);
26078  }
26079 
26080  clReleaseCommandQueue (queue);
26081  clReleaseMemObject(buffer);
26082  clReleaseMemObject(buffer2);
26083  clReleaseMemObject(buffer3);
26084  clReleaseEvent(gpuExec);
26085  free(result);
26086  }
26087 
26088 
26089  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
26090  size_t sz = v.size();
26091  size_t sz2 = v2.size();
26092  size_t sz3 = v3.size();
26093  size_t typesz = sizeof(double) * sz;
26094  size_t typesz2 = sizeof(char) * sz2;
26095  size_t typesz3 = sizeof(double) * sz3;
26096  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26097  if (temp_sz > 0){
26098  if (temp_sz > 2){
26099  if (temp_sz > 3){
26100  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26101  }
26102  typesz = sizeof(double) * params->buffers_size[0];
26103  typesz2 = sizeof(char) * params->buffers_size[1];
26104  typesz3 = sizeof(double) * params->buffers_size[2];
26105  }
26106  else{
26107  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26108  }
26109  }
26110  cl_int error = 0;
26111  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26112  checkError(error);
26113  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26114  checkError(error);
26115  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26116  checkError(error);
26117  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26118  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26119  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26120  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26121 
26122  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26123  checkError (error);
26124  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26125  checkError (error);
26126  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26127  checkError (error);
26128 
26129  size_t size[3] = {sz, sz2, sz3};
26130  size_t work_dimension = 3;
26131 
26132  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26133  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26134  work_dimension = 1;
26135  }
26136  else if(temp_sz > 0){
26137  if (params->multi_dimensional){
26138  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26139  For default multidimensional global work size, leave the global_work_size vector empty, \
26140  and set multi_dimensional to true. Setting the global work size based on the values inside \
26141  the global_work_size vector.");
26142  }
26143  if (temp_sz == 1){
26144  size[0] = params->global_work_size[0];
26145  work_dimension = 1;
26146  }
26147  else if (temp_sz == 2){
26148  size[0] = params->global_work_size[0];
26149  size[1] = params->global_work_size[1];
26150  work_dimension = 2;
26151  }
26152  else{
26153  size[0] = params->global_work_size[0];
26154  size[1] = params->global_work_size[1];
26155  size[2] = params->global_work_size[2];
26156  if (temp_sz > 3){
26157  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26158  }
26159  }
26160  }
26161 
26162  cl_event gpuExec;
26163 
26164  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26165 
26166  clWaitForEvents(1, &gpuExec);
26167 
26168  double *result = (double *) malloc(typesz);
26169  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26170 
26171  std::vector<double> res = std::vector<double>();
26172  res.assign(result, result+sz);
26173 
26174  clReleaseCommandQueue (queue);
26175  clReleaseMemObject(buffer);
26176  clReleaseMemObject(buffer2);
26177  clReleaseMemObject(buffer3);
26178  clReleaseEvent(gpuExec);
26179  free(result);
26180 
26181  return res;
26182  }
26183 
26184  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<char> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
26185  size_t sz = v->size();
26186  size_t sz2 = v2.size();
26187  size_t sz3 = v3.size();
26188  size_t typesz = sizeof(double) * sz;
26189  size_t typesz2 = sizeof(char) * sz2;
26190  size_t typesz3 = sizeof(double) * sz3;
26191  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26192  if (temp_sz > 0){
26193  if (temp_sz > 2){
26194  if (temp_sz > 3){
26195  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26196  }
26197  typesz = sizeof(double) * params->buffers_size[0];
26198  typesz2 = sizeof(char) * params->buffers_size[1];
26199  typesz3 = sizeof(double) * params->buffers_size[2];
26200  }
26201  else{
26202  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26203  }
26204  }
26205  cl_int error = 0;
26206  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26207  checkError(error);
26208  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26209  checkError(error);
26210  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26211  checkError(error);
26212  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26213  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26214  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26215  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26216 
26217  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26218  checkError (error);
26219  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26220  checkError (error);
26221  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26222  checkError (error);
26223 
26224  size_t size[3] = {sz, sz2, sz3};
26225  size_t work_dimension = 3;
26226 
26227  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26228  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26229  work_dimension = 1;
26230  }
26231  else if(temp_sz > 0){
26232  if (params->multi_dimensional){
26233  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26234  For default multidimensional global work size, leave the global_work_size vector empty, \
26235  and set multi_dimensional to true. Setting the global work size based on the values inside \
26236  the global_work_size vector.");
26237  }
26238  if (temp_sz == 1){
26239  size[0] = params->global_work_size[0];
26240  work_dimension = 1;
26241  }
26242  else if (temp_sz == 2){
26243  size[0] = params->global_work_size[0];
26244  size[1] = params->global_work_size[1];
26245  work_dimension = 2;
26246  }
26247  else{
26248  size[0] = params->global_work_size[0];
26249  size[1] = params->global_work_size[1];
26250  size[2] = params->global_work_size[2];
26251  if (temp_sz > 3){
26252  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26253  }
26254  }
26255  }
26256 
26257  cl_event gpuExec;
26258 
26259  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26260 
26261  clWaitForEvents(1, &gpuExec);
26262 
26263  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26264 
26265  clWaitForEvents(1, &gpuExec);
26266 
26267  double *result = (double *) malloc(typesz);
26268  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26269 
26270  v->assign(result, result+sz);
26271 
26272  clReleaseCommandQueue (queue);
26273  clReleaseMemObject(buffer);
26274  clReleaseMemObject(buffer2);
26275  clReleaseMemObject(buffer3);
26276  clReleaseEvent(gpuExec);
26277  free(result);
26278  }
26279 
26280  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
26281  size_t sz = v->size();
26282  size_t sz2 = v2->size();
26283  size_t sz3 = v3.size();
26284  size_t typesz = sizeof(double) * sz;
26285  size_t typesz2 = sizeof(char) * sz2;
26286  size_t typesz3 = sizeof(double) * sz3;
26287  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26288  if (temp_sz > 0){
26289  if (temp_sz > 2){
26290  if (temp_sz > 3){
26291  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26292  }
26293  typesz = sizeof(double) * params->buffers_size[0];
26294  typesz2 = sizeof(char) * params->buffers_size[1];
26295  typesz3 = sizeof(double) * params->buffers_size[2];
26296  }
26297  else{
26298  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26299  }
26300  }
26301  cl_int error = 0;
26302  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26303  checkError(error);
26304  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26305  checkError(error);
26306  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26307  checkError(error);
26308  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26309  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26310  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26311  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26312 
26313  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26314  checkError (error);
26315  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26316  checkError (error);
26317  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26318  checkError (error);
26319 
26320  size_t size[3] = {sz, sz2, sz3};
26321  size_t work_dimension = 3;
26322 
26323  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26324  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26325  work_dimension = 1;
26326  }
26327  else if(temp_sz > 0){
26328  if (params->multi_dimensional){
26329  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26330  For default multidimensional global work size, leave the global_work_size vector empty, \
26331  and set multi_dimensional to true. Setting the global work size based on the values inside \
26332  the global_work_size vector.");
26333  }
26334  if (temp_sz == 1){
26335  size[0] = params->global_work_size[0];
26336  work_dimension = 1;
26337  }
26338  else if (temp_sz == 2){
26339  size[0] = params->global_work_size[0];
26340  size[1] = params->global_work_size[1];
26341  work_dimension = 2;
26342  }
26343  else{
26344  size[0] = params->global_work_size[0];
26345  size[1] = params->global_work_size[1];
26346  size[2] = params->global_work_size[2];
26347  if (temp_sz > 3){
26348  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26349  }
26350  }
26351  }
26352 
26353  cl_event gpuExec;
26354 
26355  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26356 
26357  clWaitForEvents(1, &gpuExec);
26358 
26359  double *result = (double *) malloc(typesz);
26360  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26361 
26362  v->assign(result, result+sz);
26363 
26364  if (typesz2 != typesz or sz != sz2){
26365  char *result2;
26366  result2 = (char *) malloc(typesz2);
26367  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26368 
26369  v2->assign(result2, result2+sz2);
26370  free(result2);
26371  }
26372  else{
26373  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26374 
26375  v2->assign(result, result+sz2);
26376  }
26377 
26378  clReleaseCommandQueue (queue);
26379  clReleaseMemObject(buffer);
26380  clReleaseMemObject(buffer2);
26381  clReleaseMemObject(buffer3);
26382  clReleaseEvent(gpuExec);
26383  free(result);
26384  }
26385 
26386  void ROS_OpenCL::process(std::vector<double>* v, std::vector<char>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
26387  size_t sz = v->size();
26388  size_t sz2 = v2->size();
26389  size_t sz3 = v3->size();
26390  size_t typesz = sizeof(double) * sz;
26391  size_t typesz2 = sizeof(char) * sz2;
26392  size_t typesz3 = sizeof(double) * sz3;
26393  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26394  if (temp_sz > 0){
26395  if (temp_sz > 2){
26396  if (temp_sz > 3){
26397  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26398  }
26399  typesz = sizeof(double) * params->buffers_size[0];
26400  typesz2 = sizeof(char) * params->buffers_size[1];
26401  typesz3 = sizeof(double) * params->buffers_size[2];
26402  }
26403  else{
26404  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26405  }
26406  }
26407  cl_int error = 0;
26408  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26409  checkError(error);
26410  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26411  checkError(error);
26412  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
26413  checkError(error);
26414  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26415  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26416  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26417  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26418 
26419  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26420  checkError (error);
26421  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26422  checkError (error);
26423  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26424  checkError (error);
26425 
26426  size_t size[3] = {sz, sz2, sz3};
26427  size_t work_dimension = 3;
26428 
26429  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26430  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26431  work_dimension = 1;
26432  }
26433  else if(temp_sz > 0){
26434  if (params->multi_dimensional){
26435  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26436  For default multidimensional global work size, leave the global_work_size vector empty, \
26437  and set multi_dimensional to true. Setting the global work size based on the values inside \
26438  the global_work_size vector.");
26439  }
26440  if (temp_sz == 1){
26441  size[0] = params->global_work_size[0];
26442  work_dimension = 1;
26443  }
26444  else if (temp_sz == 2){
26445  size[0] = params->global_work_size[0];
26446  size[1] = params->global_work_size[1];
26447  work_dimension = 2;
26448  }
26449  else{
26450  size[0] = params->global_work_size[0];
26451  size[1] = params->global_work_size[1];
26452  size[2] = params->global_work_size[2];
26453  if (temp_sz > 3){
26454  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26455  }
26456  }
26457  }
26458 
26459  cl_event gpuExec;
26460 
26461  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26462 
26463  clWaitForEvents(1, &gpuExec);
26464 
26465  double *result = (double *) malloc(typesz);
26466  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26467 
26468  v->assign(result, result+sz);
26469 
26470  if (typesz2 != typesz or sz != sz2){
26471  char *result2;
26472  result2 = (char *) malloc(typesz2);
26473  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26474 
26475  v2->assign(result2, result2+sz2);
26476  free(result2);
26477  }
26478  else{
26479  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26480 
26481  v2->assign(result, result+sz2);
26482  }
26483 
26484  if (typesz3 != typesz or sz != sz3){
26485  double *result3;
26486  result3 = (double *) malloc(typesz3);
26487  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26488 
26489  v3->assign(result3, result3+sz3);
26490  free(result3);
26491  }
26492  else{
26493  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26494 
26495  v3->assign(result, result+sz3);
26496  }
26497 
26498  clReleaseCommandQueue (queue);
26499  clReleaseMemObject(buffer);
26500  clReleaseMemObject(buffer2);
26501  clReleaseMemObject(buffer3);
26502  clReleaseEvent(gpuExec);
26503  free(result);
26504  }
26505 
26506 
26507  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
26508  size_t sz = v.size();
26509  size_t sz2 = v2.size();
26510  size_t sz3 = v3.size();
26511  size_t typesz = sizeof(double) * sz;
26512  size_t typesz2 = sizeof(int) * sz2;
26513  size_t typesz3 = sizeof(char) * sz3;
26514  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26515  if (temp_sz > 0){
26516  if (temp_sz > 2){
26517  if (temp_sz > 3){
26518  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26519  }
26520  typesz = sizeof(double) * params->buffers_size[0];
26521  typesz2 = sizeof(int) * params->buffers_size[1];
26522  typesz3 = sizeof(char) * params->buffers_size[2];
26523  }
26524  else{
26525  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26526  }
26527  }
26528  cl_int error = 0;
26529  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26530  checkError(error);
26531  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26532  checkError(error);
26533  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26534  checkError(error);
26535  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26536  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26537  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26538  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26539 
26540  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26541  checkError (error);
26542  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26543  checkError (error);
26544  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26545  checkError (error);
26546 
26547  size_t size[3] = {sz, sz2, sz3};
26548  size_t work_dimension = 3;
26549 
26550  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26551  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26552  work_dimension = 1;
26553  }
26554  else if(temp_sz > 0){
26555  if (params->multi_dimensional){
26556  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26557  For default multidimensional global work size, leave the global_work_size vector empty, \
26558  and set multi_dimensional to true. Setting the global work size based on the values inside \
26559  the global_work_size vector.");
26560  }
26561  if (temp_sz == 1){
26562  size[0] = params->global_work_size[0];
26563  work_dimension = 1;
26564  }
26565  else if (temp_sz == 2){
26566  size[0] = params->global_work_size[0];
26567  size[1] = params->global_work_size[1];
26568  work_dimension = 2;
26569  }
26570  else{
26571  size[0] = params->global_work_size[0];
26572  size[1] = params->global_work_size[1];
26573  size[2] = params->global_work_size[2];
26574  if (temp_sz > 3){
26575  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26576  }
26577  }
26578  }
26579 
26580  cl_event gpuExec;
26581 
26582  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26583 
26584  clWaitForEvents(1, &gpuExec);
26585 
26586  double *result = (double *) malloc(typesz);
26587  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26588 
26589  std::vector<double> res = std::vector<double>();
26590  res.assign(result, result+sz);
26591 
26592  clReleaseCommandQueue (queue);
26593  clReleaseMemObject(buffer);
26594  clReleaseMemObject(buffer2);
26595  clReleaseMemObject(buffer3);
26596  clReleaseEvent(gpuExec);
26597  free(result);
26598 
26599  return res;
26600  }
26601 
26602  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<int> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
26603  size_t sz = v->size();
26604  size_t sz2 = v2.size();
26605  size_t sz3 = v3.size();
26606  size_t typesz = sizeof(double) * sz;
26607  size_t typesz2 = sizeof(int) * sz2;
26608  size_t typesz3 = sizeof(char) * sz3;
26609  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26610  if (temp_sz > 0){
26611  if (temp_sz > 2){
26612  if (temp_sz > 3){
26613  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26614  }
26615  typesz = sizeof(double) * params->buffers_size[0];
26616  typesz2 = sizeof(int) * params->buffers_size[1];
26617  typesz3 = sizeof(char) * params->buffers_size[2];
26618  }
26619  else{
26620  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26621  }
26622  }
26623  cl_int error = 0;
26624  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26625  checkError(error);
26626  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26627  checkError(error);
26628  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26629  checkError(error);
26630  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26631  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26632  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26633  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26634 
26635  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26636  checkError (error);
26637  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26638  checkError (error);
26639  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26640  checkError (error);
26641 
26642  size_t size[3] = {sz, sz2, sz3};
26643  size_t work_dimension = 3;
26644 
26645  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26646  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26647  work_dimension = 1;
26648  }
26649  else if(temp_sz > 0){
26650  if (params->multi_dimensional){
26651  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26652  For default multidimensional global work size, leave the global_work_size vector empty, \
26653  and set multi_dimensional to true. Setting the global work size based on the values inside \
26654  the global_work_size vector.");
26655  }
26656  if (temp_sz == 1){
26657  size[0] = params->global_work_size[0];
26658  work_dimension = 1;
26659  }
26660  else if (temp_sz == 2){
26661  size[0] = params->global_work_size[0];
26662  size[1] = params->global_work_size[1];
26663  work_dimension = 2;
26664  }
26665  else{
26666  size[0] = params->global_work_size[0];
26667  size[1] = params->global_work_size[1];
26668  size[2] = params->global_work_size[2];
26669  if (temp_sz > 3){
26670  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26671  }
26672  }
26673  }
26674 
26675  cl_event gpuExec;
26676 
26677  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26678 
26679  clWaitForEvents(1, &gpuExec);
26680 
26681  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26682 
26683  clWaitForEvents(1, &gpuExec);
26684 
26685  double *result = (double *) malloc(typesz);
26686  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26687 
26688  v->assign(result, result+sz);
26689 
26690  clReleaseCommandQueue (queue);
26691  clReleaseMemObject(buffer);
26692  clReleaseMemObject(buffer2);
26693  clReleaseMemObject(buffer3);
26694  clReleaseEvent(gpuExec);
26695  free(result);
26696  }
26697 
26698  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
26699  size_t sz = v->size();
26700  size_t sz2 = v2->size();
26701  size_t sz3 = v3.size();
26702  size_t typesz = sizeof(double) * sz;
26703  size_t typesz2 = sizeof(int) * sz2;
26704  size_t typesz3 = sizeof(char) * sz3;
26705  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26706  if (temp_sz > 0){
26707  if (temp_sz > 2){
26708  if (temp_sz > 3){
26709  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26710  }
26711  typesz = sizeof(double) * params->buffers_size[0];
26712  typesz2 = sizeof(int) * params->buffers_size[1];
26713  typesz3 = sizeof(char) * params->buffers_size[2];
26714  }
26715  else{
26716  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26717  }
26718  }
26719  cl_int error = 0;
26720  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26721  checkError(error);
26722  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26723  checkError(error);
26724  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26725  checkError(error);
26726  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26727  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26728  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26729  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26730 
26731  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26732  checkError (error);
26733  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26734  checkError (error);
26735  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26736  checkError (error);
26737 
26738  size_t size[3] = {sz, sz2, sz3};
26739  size_t work_dimension = 3;
26740 
26741  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26742  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26743  work_dimension = 1;
26744  }
26745  else if(temp_sz > 0){
26746  if (params->multi_dimensional){
26747  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26748  For default multidimensional global work size, leave the global_work_size vector empty, \
26749  and set multi_dimensional to true. Setting the global work size based on the values inside \
26750  the global_work_size vector.");
26751  }
26752  if (temp_sz == 1){
26753  size[0] = params->global_work_size[0];
26754  work_dimension = 1;
26755  }
26756  else if (temp_sz == 2){
26757  size[0] = params->global_work_size[0];
26758  size[1] = params->global_work_size[1];
26759  work_dimension = 2;
26760  }
26761  else{
26762  size[0] = params->global_work_size[0];
26763  size[1] = params->global_work_size[1];
26764  size[2] = params->global_work_size[2];
26765  if (temp_sz > 3){
26766  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26767  }
26768  }
26769  }
26770 
26771  cl_event gpuExec;
26772 
26773  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26774 
26775  clWaitForEvents(1, &gpuExec);
26776 
26777  double *result = (double *) malloc(typesz);
26778  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26779 
26780  v->assign(result, result+sz);
26781 
26782  if (typesz2 != typesz or sz != sz2){
26783  int *result2;
26784  result2 = (int *) malloc(typesz2);
26785  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26786 
26787  v2->assign(result2, result2+sz2);
26788  free(result2);
26789  }
26790  else{
26791  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26792 
26793  v2->assign(result, result+sz2);
26794  }
26795 
26796  clReleaseCommandQueue (queue);
26797  clReleaseMemObject(buffer);
26798  clReleaseMemObject(buffer2);
26799  clReleaseMemObject(buffer3);
26800  clReleaseEvent(gpuExec);
26801  free(result);
26802  }
26803 
26804  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
26805  size_t sz = v->size();
26806  size_t sz2 = v2->size();
26807  size_t sz3 = v3->size();
26808  size_t typesz = sizeof(double) * sz;
26809  size_t typesz2 = sizeof(int) * sz2;
26810  size_t typesz3 = sizeof(char) * sz3;
26811  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26812  if (temp_sz > 0){
26813  if (temp_sz > 2){
26814  if (temp_sz > 3){
26815  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26816  }
26817  typesz = sizeof(double) * params->buffers_size[0];
26818  typesz2 = sizeof(int) * params->buffers_size[1];
26819  typesz3 = sizeof(char) * params->buffers_size[2];
26820  }
26821  else{
26822  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26823  }
26824  }
26825  cl_int error = 0;
26826  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26827  checkError(error);
26828  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26829  checkError(error);
26830  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
26831  checkError(error);
26832  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26833  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26834  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26835  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26836 
26837  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26838  checkError (error);
26839  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26840  checkError (error);
26841  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26842  checkError (error);
26843 
26844  size_t size[3] = {sz, sz2, sz3};
26845  size_t work_dimension = 3;
26846 
26847  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26848  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26849  work_dimension = 1;
26850  }
26851  else if(temp_sz > 0){
26852  if (params->multi_dimensional){
26853  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26854  For default multidimensional global work size, leave the global_work_size vector empty, \
26855  and set multi_dimensional to true. Setting the global work size based on the values inside \
26856  the global_work_size vector.");
26857  }
26858  if (temp_sz == 1){
26859  size[0] = params->global_work_size[0];
26860  work_dimension = 1;
26861  }
26862  else if (temp_sz == 2){
26863  size[0] = params->global_work_size[0];
26864  size[1] = params->global_work_size[1];
26865  work_dimension = 2;
26866  }
26867  else{
26868  size[0] = params->global_work_size[0];
26869  size[1] = params->global_work_size[1];
26870  size[2] = params->global_work_size[2];
26871  if (temp_sz > 3){
26872  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26873  }
26874  }
26875  }
26876 
26877  cl_event gpuExec;
26878 
26879  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26880 
26881  clWaitForEvents(1, &gpuExec);
26882 
26883  double *result = (double *) malloc(typesz);
26884  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26885 
26886  v->assign(result, result+sz);
26887 
26888  if (typesz2 != typesz or sz != sz2){
26889  int *result2;
26890  result2 = (int *) malloc(typesz2);
26891  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26892 
26893  v2->assign(result2, result2+sz2);
26894  free(result2);
26895  }
26896  else{
26897  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26898 
26899  v2->assign(result, result+sz2);
26900  }
26901 
26902  if (typesz3 != typesz or sz != sz3){
26903  char *result3;
26904  result3 = (char *) malloc(typesz3);
26905  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26906 
26907  v3->assign(result3, result3+sz3);
26908  free(result3);
26909  }
26910  else{
26911  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26912 
26913  v3->assign(result, result+sz3);
26914  }
26915 
26916  clReleaseCommandQueue (queue);
26917  clReleaseMemObject(buffer);
26918  clReleaseMemObject(buffer2);
26919  clReleaseMemObject(buffer3);
26920  clReleaseEvent(gpuExec);
26921  free(result);
26922  }
26923 
26924 
26925  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
26926  size_t sz = v.size();
26927  size_t sz2 = v2.size();
26928  size_t sz3 = v3.size();
26929  size_t typesz = sizeof(double) * sz;
26930  size_t typesz2 = sizeof(int) * sz2;
26931  size_t typesz3 = sizeof(int) * sz3;
26932  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
26933  if (temp_sz > 0){
26934  if (temp_sz > 2){
26935  if (temp_sz > 3){
26936  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26937  }
26938  typesz = sizeof(double) * params->buffers_size[0];
26939  typesz2 = sizeof(int) * params->buffers_size[1];
26940  typesz3 = sizeof(int) * params->buffers_size[2];
26941  }
26942  else{
26943  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26944  }
26945  }
26946  cl_int error = 0;
26947  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26948  checkError(error);
26949  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26950  checkError(error);
26951  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26952  checkError(error);
26953  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
26954  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
26955  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
26956  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
26957 
26958  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26959  checkError (error);
26960  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26961  checkError (error);
26962  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26963  checkError (error);
26964 
26965  size_t size[3] = {sz, sz2, sz3};
26966  size_t work_dimension = 3;
26967 
26968  temp_sz = params != NULL ? params->global_work_size.size() : 0;
26969  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
26970  work_dimension = 1;
26971  }
26972  else if(temp_sz > 0){
26973  if (params->multi_dimensional){
26974  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
26975  For default multidimensional global work size, leave the global_work_size vector empty, \
26976  and set multi_dimensional to true. Setting the global work size based on the values inside \
26977  the global_work_size vector.");
26978  }
26979  if (temp_sz == 1){
26980  size[0] = params->global_work_size[0];
26981  work_dimension = 1;
26982  }
26983  else if (temp_sz == 2){
26984  size[0] = params->global_work_size[0];
26985  size[1] = params->global_work_size[1];
26986  work_dimension = 2;
26987  }
26988  else{
26989  size[0] = params->global_work_size[0];
26990  size[1] = params->global_work_size[1];
26991  size[2] = params->global_work_size[2];
26992  if (temp_sz > 3){
26993  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26994  }
26995  }
26996  }
26997 
26998  cl_event gpuExec;
26999 
27000  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27001 
27002  clWaitForEvents(1, &gpuExec);
27003 
27004  double *result = (double *) malloc(typesz);
27005  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27006 
27007  std::vector<double> res = std::vector<double>();
27008  res.assign(result, result+sz);
27009 
27010  clReleaseCommandQueue (queue);
27011  clReleaseMemObject(buffer);
27012  clReleaseMemObject(buffer2);
27013  clReleaseMemObject(buffer3);
27014  clReleaseEvent(gpuExec);
27015  free(result);
27016 
27017  return res;
27018  }
27019 
27020  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<int> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
27021  size_t sz = v->size();
27022  size_t sz2 = v2.size();
27023  size_t sz3 = v3.size();
27024  size_t typesz = sizeof(double) * sz;
27025  size_t typesz2 = sizeof(int) * sz2;
27026  size_t typesz3 = sizeof(int) * sz3;
27027  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27028  if (temp_sz > 0){
27029  if (temp_sz > 2){
27030  if (temp_sz > 3){
27031  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27032  }
27033  typesz = sizeof(double) * params->buffers_size[0];
27034  typesz2 = sizeof(int) * params->buffers_size[1];
27035  typesz3 = sizeof(int) * params->buffers_size[2];
27036  }
27037  else{
27038  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27039  }
27040  }
27041  cl_int error = 0;
27042  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27043  checkError(error);
27044  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27045  checkError(error);
27046  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27047  checkError(error);
27048  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27049  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27050  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27051  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27052 
27053  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27054  checkError (error);
27055  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27056  checkError (error);
27057  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27058  checkError (error);
27059 
27060  size_t size[3] = {sz, sz2, sz3};
27061  size_t work_dimension = 3;
27062 
27063  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27064  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27065  work_dimension = 1;
27066  }
27067  else if(temp_sz > 0){
27068  if (params->multi_dimensional){
27069  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27070  For default multidimensional global work size, leave the global_work_size vector empty, \
27071  and set multi_dimensional to true. Setting the global work size based on the values inside \
27072  the global_work_size vector.");
27073  }
27074  if (temp_sz == 1){
27075  size[0] = params->global_work_size[0];
27076  work_dimension = 1;
27077  }
27078  else if (temp_sz == 2){
27079  size[0] = params->global_work_size[0];
27080  size[1] = params->global_work_size[1];
27081  work_dimension = 2;
27082  }
27083  else{
27084  size[0] = params->global_work_size[0];
27085  size[1] = params->global_work_size[1];
27086  size[2] = params->global_work_size[2];
27087  if (temp_sz > 3){
27088  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27089  }
27090  }
27091  }
27092 
27093  cl_event gpuExec;
27094 
27095  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27096 
27097  clWaitForEvents(1, &gpuExec);
27098 
27099  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27100 
27101  clWaitForEvents(1, &gpuExec);
27102 
27103  double *result = (double *) malloc(typesz);
27104  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27105 
27106  v->assign(result, result+sz);
27107 
27108  clReleaseCommandQueue (queue);
27109  clReleaseMemObject(buffer);
27110  clReleaseMemObject(buffer2);
27111  clReleaseMemObject(buffer3);
27112  clReleaseEvent(gpuExec);
27113  free(result);
27114  }
27115 
27116  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
27117  size_t sz = v->size();
27118  size_t sz2 = v2->size();
27119  size_t sz3 = v3.size();
27120  size_t typesz = sizeof(double) * sz;
27121  size_t typesz2 = sizeof(int) * sz2;
27122  size_t typesz3 = sizeof(int) * sz3;
27123  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27124  if (temp_sz > 0){
27125  if (temp_sz > 2){
27126  if (temp_sz > 3){
27127  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27128  }
27129  typesz = sizeof(double) * params->buffers_size[0];
27130  typesz2 = sizeof(int) * params->buffers_size[1];
27131  typesz3 = sizeof(int) * params->buffers_size[2];
27132  }
27133  else{
27134  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27135  }
27136  }
27137  cl_int error = 0;
27138  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27139  checkError(error);
27140  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27141  checkError(error);
27142  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27143  checkError(error);
27144  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27145  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27146  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27147  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27148 
27149  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27150  checkError (error);
27151  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27152  checkError (error);
27153  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27154  checkError (error);
27155 
27156  size_t size[3] = {sz, sz2, sz3};
27157  size_t work_dimension = 3;
27158 
27159  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27160  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27161  work_dimension = 1;
27162  }
27163  else if(temp_sz > 0){
27164  if (params->multi_dimensional){
27165  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27166  For default multidimensional global work size, leave the global_work_size vector empty, \
27167  and set multi_dimensional to true. Setting the global work size based on the values inside \
27168  the global_work_size vector.");
27169  }
27170  if (temp_sz == 1){
27171  size[0] = params->global_work_size[0];
27172  work_dimension = 1;
27173  }
27174  else if (temp_sz == 2){
27175  size[0] = params->global_work_size[0];
27176  size[1] = params->global_work_size[1];
27177  work_dimension = 2;
27178  }
27179  else{
27180  size[0] = params->global_work_size[0];
27181  size[1] = params->global_work_size[1];
27182  size[2] = params->global_work_size[2];
27183  if (temp_sz > 3){
27184  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27185  }
27186  }
27187  }
27188 
27189  cl_event gpuExec;
27190 
27191  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27192 
27193  clWaitForEvents(1, &gpuExec);
27194 
27195  double *result = (double *) malloc(typesz);
27196  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27197 
27198  v->assign(result, result+sz);
27199 
27200  if (typesz2 != typesz or sz != sz2){
27201  int *result2;
27202  result2 = (int *) malloc(typesz2);
27203  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27204 
27205  v2->assign(result2, result2+sz2);
27206  free(result2);
27207  }
27208  else{
27209  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27210 
27211  v2->assign(result, result+sz2);
27212  }
27213 
27214  clReleaseCommandQueue (queue);
27215  clReleaseMemObject(buffer);
27216  clReleaseMemObject(buffer2);
27217  clReleaseMemObject(buffer3);
27218  clReleaseEvent(gpuExec);
27219  free(result);
27220  }
27221 
27222  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
27223  size_t sz = v->size();
27224  size_t sz2 = v2->size();
27225  size_t sz3 = v3->size();
27226  size_t typesz = sizeof(double) * sz;
27227  size_t typesz2 = sizeof(int) * sz2;
27228  size_t typesz3 = sizeof(int) * sz3;
27229  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27230  if (temp_sz > 0){
27231  if (temp_sz > 2){
27232  if (temp_sz > 3){
27233  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27234  }
27235  typesz = sizeof(double) * params->buffers_size[0];
27236  typesz2 = sizeof(int) * params->buffers_size[1];
27237  typesz3 = sizeof(int) * params->buffers_size[2];
27238  }
27239  else{
27240  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27241  }
27242  }
27243  cl_int error = 0;
27244  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27245  checkError(error);
27246  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27247  checkError(error);
27248  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
27249  checkError(error);
27250  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27251  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27252  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27253  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27254 
27255  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27256  checkError (error);
27257  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27258  checkError (error);
27259  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
27260  checkError (error);
27261 
27262  size_t size[3] = {sz, sz2, sz3};
27263  size_t work_dimension = 3;
27264 
27265  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27266  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27267  work_dimension = 1;
27268  }
27269  else if(temp_sz > 0){
27270  if (params->multi_dimensional){
27271  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27272  For default multidimensional global work size, leave the global_work_size vector empty, \
27273  and set multi_dimensional to true. Setting the global work size based on the values inside \
27274  the global_work_size vector.");
27275  }
27276  if (temp_sz == 1){
27277  size[0] = params->global_work_size[0];
27278  work_dimension = 1;
27279  }
27280  else if (temp_sz == 2){
27281  size[0] = params->global_work_size[0];
27282  size[1] = params->global_work_size[1];
27283  work_dimension = 2;
27284  }
27285  else{
27286  size[0] = params->global_work_size[0];
27287  size[1] = params->global_work_size[1];
27288  size[2] = params->global_work_size[2];
27289  if (temp_sz > 3){
27290  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27291  }
27292  }
27293  }
27294 
27295  cl_event gpuExec;
27296 
27297  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27298 
27299  clWaitForEvents(1, &gpuExec);
27300 
27301  double *result = (double *) malloc(typesz);
27302  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27303 
27304  v->assign(result, result+sz);
27305 
27306  if (typesz2 != typesz or sz != sz2){
27307  int *result2;
27308  result2 = (int *) malloc(typesz2);
27309  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27310 
27311  v2->assign(result2, result2+sz2);
27312  free(result2);
27313  }
27314  else{
27315  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27316 
27317  v2->assign(result, result+sz2);
27318  }
27319 
27320  if (typesz3 != typesz or sz != sz3){
27321  int *result3;
27322  result3 = (int *) malloc(typesz3);
27323  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
27324 
27325  v3->assign(result3, result3+sz3);
27326  free(result3);
27327  }
27328  else{
27329  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27330 
27331  v3->assign(result, result+sz3);
27332  }
27333 
27334  clReleaseCommandQueue (queue);
27335  clReleaseMemObject(buffer);
27336  clReleaseMemObject(buffer2);
27337  clReleaseMemObject(buffer3);
27338  clReleaseEvent(gpuExec);
27339  free(result);
27340  }
27341 
27342 
27343  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
27344  size_t sz = v.size();
27345  size_t sz2 = v2.size();
27346  size_t sz3 = v3.size();
27347  size_t typesz = sizeof(double) * sz;
27348  size_t typesz2 = sizeof(int) * sz2;
27349  size_t typesz3 = sizeof(float) * sz3;
27350  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27351  if (temp_sz > 0){
27352  if (temp_sz > 2){
27353  if (temp_sz > 3){
27354  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27355  }
27356  typesz = sizeof(double) * params->buffers_size[0];
27357  typesz2 = sizeof(int) * params->buffers_size[1];
27358  typesz3 = sizeof(float) * params->buffers_size[2];
27359  }
27360  else{
27361  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27362  }
27363  }
27364  cl_int error = 0;
27365  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27366  checkError(error);
27367  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27368  checkError(error);
27369  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27370  checkError(error);
27371  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27372  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27373  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27374  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27375 
27376  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
27377  checkError (error);
27378  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27379  checkError (error);
27380  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27381  checkError (error);
27382 
27383  size_t size[3] = {sz, sz2, sz3};
27384  size_t work_dimension = 3;
27385 
27386  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27387  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27388  work_dimension = 1;
27389  }
27390  else if(temp_sz > 0){
27391  if (params->multi_dimensional){
27392  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27393  For default multidimensional global work size, leave the global_work_size vector empty, \
27394  and set multi_dimensional to true. Setting the global work size based on the values inside \
27395  the global_work_size vector.");
27396  }
27397  if (temp_sz == 1){
27398  size[0] = params->global_work_size[0];
27399  work_dimension = 1;
27400  }
27401  else if (temp_sz == 2){
27402  size[0] = params->global_work_size[0];
27403  size[1] = params->global_work_size[1];
27404  work_dimension = 2;
27405  }
27406  else{
27407  size[0] = params->global_work_size[0];
27408  size[1] = params->global_work_size[1];
27409  size[2] = params->global_work_size[2];
27410  if (temp_sz > 3){
27411  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27412  }
27413  }
27414  }
27415 
27416  cl_event gpuExec;
27417 
27418  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27419 
27420  clWaitForEvents(1, &gpuExec);
27421 
27422  double *result = (double *) malloc(typesz);
27423  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27424 
27425  std::vector<double> res = std::vector<double>();
27426  res.assign(result, result+sz);
27427 
27428  clReleaseCommandQueue (queue);
27429  clReleaseMemObject(buffer);
27430  clReleaseMemObject(buffer2);
27431  clReleaseMemObject(buffer3);
27432  clReleaseEvent(gpuExec);
27433  free(result);
27434 
27435  return res;
27436  }
27437 
27438  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<int> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
27439  size_t sz = v->size();
27440  size_t sz2 = v2.size();
27441  size_t sz3 = v3.size();
27442  size_t typesz = sizeof(double) * sz;
27443  size_t typesz2 = sizeof(int) * sz2;
27444  size_t typesz3 = sizeof(float) * sz3;
27445  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27446  if (temp_sz > 0){
27447  if (temp_sz > 2){
27448  if (temp_sz > 3){
27449  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27450  }
27451  typesz = sizeof(double) * params->buffers_size[0];
27452  typesz2 = sizeof(int) * params->buffers_size[1];
27453  typesz3 = sizeof(float) * params->buffers_size[2];
27454  }
27455  else{
27456  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27457  }
27458  }
27459  cl_int error = 0;
27460  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27461  checkError(error);
27462  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27463  checkError(error);
27464  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27465  checkError(error);
27466  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27467  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27468  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27469  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27470 
27471  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27472  checkError (error);
27473  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27474  checkError (error);
27475  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27476  checkError (error);
27477 
27478  size_t size[3] = {sz, sz2, sz3};
27479  size_t work_dimension = 3;
27480 
27481  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27482  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27483  work_dimension = 1;
27484  }
27485  else if(temp_sz > 0){
27486  if (params->multi_dimensional){
27487  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27488  For default multidimensional global work size, leave the global_work_size vector empty, \
27489  and set multi_dimensional to true. Setting the global work size based on the values inside \
27490  the global_work_size vector.");
27491  }
27492  if (temp_sz == 1){
27493  size[0] = params->global_work_size[0];
27494  work_dimension = 1;
27495  }
27496  else if (temp_sz == 2){
27497  size[0] = params->global_work_size[0];
27498  size[1] = params->global_work_size[1];
27499  work_dimension = 2;
27500  }
27501  else{
27502  size[0] = params->global_work_size[0];
27503  size[1] = params->global_work_size[1];
27504  size[2] = params->global_work_size[2];
27505  if (temp_sz > 3){
27506  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27507  }
27508  }
27509  }
27510 
27511  cl_event gpuExec;
27512 
27513  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27514 
27515  clWaitForEvents(1, &gpuExec);
27516 
27517  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27518 
27519  clWaitForEvents(1, &gpuExec);
27520 
27521  double *result = (double *) malloc(typesz);
27522  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27523 
27524  v->assign(result, result+sz);
27525 
27526  clReleaseCommandQueue (queue);
27527  clReleaseMemObject(buffer);
27528  clReleaseMemObject(buffer2);
27529  clReleaseMemObject(buffer3);
27530  clReleaseEvent(gpuExec);
27531  free(result);
27532  }
27533 
27534  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
27535  size_t sz = v->size();
27536  size_t sz2 = v2->size();
27537  size_t sz3 = v3.size();
27538  size_t typesz = sizeof(double) * sz;
27539  size_t typesz2 = sizeof(int) * sz2;
27540  size_t typesz3 = sizeof(float) * sz3;
27541  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27542  if (temp_sz > 0){
27543  if (temp_sz > 2){
27544  if (temp_sz > 3){
27545  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27546  }
27547  typesz = sizeof(double) * params->buffers_size[0];
27548  typesz2 = sizeof(int) * params->buffers_size[1];
27549  typesz3 = sizeof(float) * params->buffers_size[2];
27550  }
27551  else{
27552  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27553  }
27554  }
27555  cl_int error = 0;
27556  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27557  checkError(error);
27558  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27559  checkError(error);
27560  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27561  checkError(error);
27562  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27563  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27564  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27565  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27566 
27567  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27568  checkError (error);
27569  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27570  checkError (error);
27571  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27572  checkError (error);
27573 
27574  size_t size[3] = {sz, sz2, sz3};
27575  size_t work_dimension = 3;
27576 
27577  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27578  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27579  work_dimension = 1;
27580  }
27581  else if(temp_sz > 0){
27582  if (params->multi_dimensional){
27583  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27584  For default multidimensional global work size, leave the global_work_size vector empty, \
27585  and set multi_dimensional to true. Setting the global work size based on the values inside \
27586  the global_work_size vector.");
27587  }
27588  if (temp_sz == 1){
27589  size[0] = params->global_work_size[0];
27590  work_dimension = 1;
27591  }
27592  else if (temp_sz == 2){
27593  size[0] = params->global_work_size[0];
27594  size[1] = params->global_work_size[1];
27595  work_dimension = 2;
27596  }
27597  else{
27598  size[0] = params->global_work_size[0];
27599  size[1] = params->global_work_size[1];
27600  size[2] = params->global_work_size[2];
27601  if (temp_sz > 3){
27602  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27603  }
27604  }
27605  }
27606 
27607  cl_event gpuExec;
27608 
27609  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27610 
27611  clWaitForEvents(1, &gpuExec);
27612 
27613  double *result = (double *) malloc(typesz);
27614  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27615 
27616  v->assign(result, result+sz);
27617 
27618  if (typesz2 != typesz or sz != sz2){
27619  int *result2;
27620  result2 = (int *) malloc(typesz2);
27621  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27622 
27623  v2->assign(result2, result2+sz2);
27624  free(result2);
27625  }
27626  else{
27627  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27628 
27629  v2->assign(result, result+sz2);
27630  }
27631 
27632  clReleaseCommandQueue (queue);
27633  clReleaseMemObject(buffer);
27634  clReleaseMemObject(buffer2);
27635  clReleaseMemObject(buffer3);
27636  clReleaseEvent(gpuExec);
27637  free(result);
27638  }
27639 
27640  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
27641  size_t sz = v->size();
27642  size_t sz2 = v2->size();
27643  size_t sz3 = v3->size();
27644  size_t typesz = sizeof(double) * sz;
27645  size_t typesz2 = sizeof(int) * sz2;
27646  size_t typesz3 = sizeof(float) * sz3;
27647  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27648  if (temp_sz > 0){
27649  if (temp_sz > 2){
27650  if (temp_sz > 3){
27651  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27652  }
27653  typesz = sizeof(double) * params->buffers_size[0];
27654  typesz2 = sizeof(int) * params->buffers_size[1];
27655  typesz3 = sizeof(float) * params->buffers_size[2];
27656  }
27657  else{
27658  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27659  }
27660  }
27661  cl_int error = 0;
27662  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27663  checkError(error);
27664  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27665  checkError(error);
27666  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
27667  checkError(error);
27668  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27669  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27670  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27671  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27672 
27673  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27674  checkError (error);
27675  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27676  checkError (error);
27677  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
27678  checkError (error);
27679 
27680  size_t size[3] = {sz, sz2, sz3};
27681  size_t work_dimension = 3;
27682 
27683  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27684  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27685  work_dimension = 1;
27686  }
27687  else if(temp_sz > 0){
27688  if (params->multi_dimensional){
27689  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27690  For default multidimensional global work size, leave the global_work_size vector empty, \
27691  and set multi_dimensional to true. Setting the global work size based on the values inside \
27692  the global_work_size vector.");
27693  }
27694  if (temp_sz == 1){
27695  size[0] = params->global_work_size[0];
27696  work_dimension = 1;
27697  }
27698  else if (temp_sz == 2){
27699  size[0] = params->global_work_size[0];
27700  size[1] = params->global_work_size[1];
27701  work_dimension = 2;
27702  }
27703  else{
27704  size[0] = params->global_work_size[0];
27705  size[1] = params->global_work_size[1];
27706  size[2] = params->global_work_size[2];
27707  if (temp_sz > 3){
27708  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27709  }
27710  }
27711  }
27712 
27713  cl_event gpuExec;
27714 
27715  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27716 
27717  clWaitForEvents(1, &gpuExec);
27718 
27719  double *result = (double *) malloc(typesz);
27720  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27721 
27722  v->assign(result, result+sz);
27723 
27724  if (typesz2 != typesz or sz != sz2){
27725  int *result2;
27726  result2 = (int *) malloc(typesz2);
27727  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27728 
27729  v2->assign(result2, result2+sz2);
27730  free(result2);
27731  }
27732  else{
27733  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27734 
27735  v2->assign(result, result+sz2);
27736  }
27737 
27738  if (typesz3 != typesz or sz != sz3){
27739  float *result3;
27740  result3 = (float *) malloc(typesz3);
27741  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
27742 
27743  v3->assign(result3, result3+sz3);
27744  free(result3);
27745  }
27746  else{
27747  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27748 
27749  v3->assign(result, result+sz3);
27750  }
27751 
27752  clReleaseCommandQueue (queue);
27753  clReleaseMemObject(buffer);
27754  clReleaseMemObject(buffer2);
27755  clReleaseMemObject(buffer3);
27756  clReleaseEvent(gpuExec);
27757  free(result);
27758  }
27759 
27760 
27761  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
27762  size_t sz = v.size();
27763  size_t sz2 = v2.size();
27764  size_t sz3 = v3.size();
27765  size_t typesz = sizeof(double) * sz;
27766  size_t typesz2 = sizeof(int) * sz2;
27767  size_t typesz3 = sizeof(double) * sz3;
27768  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27769  if (temp_sz > 0){
27770  if (temp_sz > 2){
27771  if (temp_sz > 3){
27772  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27773  }
27774  typesz = sizeof(double) * params->buffers_size[0];
27775  typesz2 = sizeof(int) * params->buffers_size[1];
27776  typesz3 = sizeof(double) * params->buffers_size[2];
27777  }
27778  else{
27779  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27780  }
27781  }
27782  cl_int error = 0;
27783  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27784  checkError(error);
27785  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27786  checkError(error);
27787  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27788  checkError(error);
27789  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27790  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27791  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27792  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27793 
27794  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
27795  checkError (error);
27796  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27797  checkError (error);
27798  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27799  checkError (error);
27800 
27801  size_t size[3] = {sz, sz2, sz3};
27802  size_t work_dimension = 3;
27803 
27804  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27805  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27806  work_dimension = 1;
27807  }
27808  else if(temp_sz > 0){
27809  if (params->multi_dimensional){
27810  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27811  For default multidimensional global work size, leave the global_work_size vector empty, \
27812  and set multi_dimensional to true. Setting the global work size based on the values inside \
27813  the global_work_size vector.");
27814  }
27815  if (temp_sz == 1){
27816  size[0] = params->global_work_size[0];
27817  work_dimension = 1;
27818  }
27819  else if (temp_sz == 2){
27820  size[0] = params->global_work_size[0];
27821  size[1] = params->global_work_size[1];
27822  work_dimension = 2;
27823  }
27824  else{
27825  size[0] = params->global_work_size[0];
27826  size[1] = params->global_work_size[1];
27827  size[2] = params->global_work_size[2];
27828  if (temp_sz > 3){
27829  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27830  }
27831  }
27832  }
27833 
27834  cl_event gpuExec;
27835 
27836  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27837 
27838  clWaitForEvents(1, &gpuExec);
27839 
27840  double *result = (double *) malloc(typesz);
27841  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27842 
27843  std::vector<double> res = std::vector<double>();
27844  res.assign(result, result+sz);
27845 
27846  clReleaseCommandQueue (queue);
27847  clReleaseMemObject(buffer);
27848  clReleaseMemObject(buffer2);
27849  clReleaseMemObject(buffer3);
27850  clReleaseEvent(gpuExec);
27851  free(result);
27852 
27853  return res;
27854  }
27855 
27856  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<int> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
27857  size_t sz = v->size();
27858  size_t sz2 = v2.size();
27859  size_t sz3 = v3.size();
27860  size_t typesz = sizeof(double) * sz;
27861  size_t typesz2 = sizeof(int) * sz2;
27862  size_t typesz3 = sizeof(double) * sz3;
27863  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27864  if (temp_sz > 0){
27865  if (temp_sz > 2){
27866  if (temp_sz > 3){
27867  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27868  }
27869  typesz = sizeof(double) * params->buffers_size[0];
27870  typesz2 = sizeof(int) * params->buffers_size[1];
27871  typesz3 = sizeof(double) * params->buffers_size[2];
27872  }
27873  else{
27874  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27875  }
27876  }
27877  cl_int error = 0;
27878  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27879  checkError(error);
27880  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27881  checkError(error);
27882  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27883  checkError(error);
27884  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27885  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27886  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27887  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27888 
27889  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27890  checkError (error);
27891  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27892  checkError (error);
27893  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27894  checkError (error);
27895 
27896  size_t size[3] = {sz, sz2, sz3};
27897  size_t work_dimension = 3;
27898 
27899  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27900  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27901  work_dimension = 1;
27902  }
27903  else if(temp_sz > 0){
27904  if (params->multi_dimensional){
27905  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
27906  For default multidimensional global work size, leave the global_work_size vector empty, \
27907  and set multi_dimensional to true. Setting the global work size based on the values inside \
27908  the global_work_size vector.");
27909  }
27910  if (temp_sz == 1){
27911  size[0] = params->global_work_size[0];
27912  work_dimension = 1;
27913  }
27914  else if (temp_sz == 2){
27915  size[0] = params->global_work_size[0];
27916  size[1] = params->global_work_size[1];
27917  work_dimension = 2;
27918  }
27919  else{
27920  size[0] = params->global_work_size[0];
27921  size[1] = params->global_work_size[1];
27922  size[2] = params->global_work_size[2];
27923  if (temp_sz > 3){
27924  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27925  }
27926  }
27927  }
27928 
27929  cl_event gpuExec;
27930 
27931  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27932 
27933  clWaitForEvents(1, &gpuExec);
27934 
27935  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27936 
27937  clWaitForEvents(1, &gpuExec);
27938 
27939  double *result = (double *) malloc(typesz);
27940  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27941 
27942  v->assign(result, result+sz);
27943 
27944  clReleaseCommandQueue (queue);
27945  clReleaseMemObject(buffer);
27946  clReleaseMemObject(buffer2);
27947  clReleaseMemObject(buffer3);
27948  clReleaseEvent(gpuExec);
27949  free(result);
27950  }
27951 
27952  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
27953  size_t sz = v->size();
27954  size_t sz2 = v2->size();
27955  size_t sz3 = v3.size();
27956  size_t typesz = sizeof(double) * sz;
27957  size_t typesz2 = sizeof(int) * sz2;
27958  size_t typesz3 = sizeof(double) * sz3;
27959  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
27960  if (temp_sz > 0){
27961  if (temp_sz > 2){
27962  if (temp_sz > 3){
27963  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27964  }
27965  typesz = sizeof(double) * params->buffers_size[0];
27966  typesz2 = sizeof(int) * params->buffers_size[1];
27967  typesz3 = sizeof(double) * params->buffers_size[2];
27968  }
27969  else{
27970  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27971  }
27972  }
27973  cl_int error = 0;
27974  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27975  checkError(error);
27976  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27977  checkError(error);
27978  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27979  checkError(error);
27980  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
27981  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
27982  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
27983  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
27984 
27985  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27986  checkError (error);
27987  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27988  checkError (error);
27989  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27990  checkError (error);
27991 
27992  size_t size[3] = {sz, sz2, sz3};
27993  size_t work_dimension = 3;
27994 
27995  temp_sz = params != NULL ? params->global_work_size.size() : 0;
27996  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
27997  work_dimension = 1;
27998  }
27999  else if(temp_sz > 0){
28000  if (params->multi_dimensional){
28001  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28002  For default multidimensional global work size, leave the global_work_size vector empty, \
28003  and set multi_dimensional to true. Setting the global work size based on the values inside \
28004  the global_work_size vector.");
28005  }
28006  if (temp_sz == 1){
28007  size[0] = params->global_work_size[0];
28008  work_dimension = 1;
28009  }
28010  else if (temp_sz == 2){
28011  size[0] = params->global_work_size[0];
28012  size[1] = params->global_work_size[1];
28013  work_dimension = 2;
28014  }
28015  else{
28016  size[0] = params->global_work_size[0];
28017  size[1] = params->global_work_size[1];
28018  size[2] = params->global_work_size[2];
28019  if (temp_sz > 3){
28020  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28021  }
28022  }
28023  }
28024 
28025  cl_event gpuExec;
28026 
28027  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28028 
28029  clWaitForEvents(1, &gpuExec);
28030 
28031  double *result = (double *) malloc(typesz);
28032  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28033 
28034  v->assign(result, result+sz);
28035 
28036  if (typesz2 != typesz or sz != sz2){
28037  int *result2;
28038  result2 = (int *) malloc(typesz2);
28039  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28040 
28041  v2->assign(result2, result2+sz2);
28042  free(result2);
28043  }
28044  else{
28045  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28046 
28047  v2->assign(result, result+sz2);
28048  }
28049 
28050  clReleaseCommandQueue (queue);
28051  clReleaseMemObject(buffer);
28052  clReleaseMemObject(buffer2);
28053  clReleaseMemObject(buffer3);
28054  clReleaseEvent(gpuExec);
28055  free(result);
28056  }
28057 
28058  void ROS_OpenCL::process(std::vector<double>* v, std::vector<int>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
28059  size_t sz = v->size();
28060  size_t sz2 = v2->size();
28061  size_t sz3 = v3->size();
28062  size_t typesz = sizeof(double) * sz;
28063  size_t typesz2 = sizeof(int) * sz2;
28064  size_t typesz3 = sizeof(double) * sz3;
28065  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28066  if (temp_sz > 0){
28067  if (temp_sz > 2){
28068  if (temp_sz > 3){
28069  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28070  }
28071  typesz = sizeof(double) * params->buffers_size[0];
28072  typesz2 = sizeof(int) * params->buffers_size[1];
28073  typesz3 = sizeof(double) * params->buffers_size[2];
28074  }
28075  else{
28076  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28077  }
28078  }
28079  cl_int error = 0;
28080  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28081  checkError(error);
28082  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28083  checkError(error);
28084  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28085  checkError(error);
28086  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28087  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28088  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28089  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28090 
28091  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28092  checkError (error);
28093  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28094  checkError (error);
28095  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28096  checkError (error);
28097 
28098  size_t size[3] = {sz, sz2, sz3};
28099  size_t work_dimension = 3;
28100 
28101  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28102  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28103  work_dimension = 1;
28104  }
28105  else if(temp_sz > 0){
28106  if (params->multi_dimensional){
28107  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28108  For default multidimensional global work size, leave the global_work_size vector empty, \
28109  and set multi_dimensional to true. Setting the global work size based on the values inside \
28110  the global_work_size vector.");
28111  }
28112  if (temp_sz == 1){
28113  size[0] = params->global_work_size[0];
28114  work_dimension = 1;
28115  }
28116  else if (temp_sz == 2){
28117  size[0] = params->global_work_size[0];
28118  size[1] = params->global_work_size[1];
28119  work_dimension = 2;
28120  }
28121  else{
28122  size[0] = params->global_work_size[0];
28123  size[1] = params->global_work_size[1];
28124  size[2] = params->global_work_size[2];
28125  if (temp_sz > 3){
28126  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28127  }
28128  }
28129  }
28130 
28131  cl_event gpuExec;
28132 
28133  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28134 
28135  clWaitForEvents(1, &gpuExec);
28136 
28137  double *result = (double *) malloc(typesz);
28138  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28139 
28140  v->assign(result, result+sz);
28141 
28142  if (typesz2 != typesz or sz != sz2){
28143  int *result2;
28144  result2 = (int *) malloc(typesz2);
28145  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28146 
28147  v2->assign(result2, result2+sz2);
28148  free(result2);
28149  }
28150  else{
28151  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28152 
28153  v2->assign(result, result+sz2);
28154  }
28155 
28156  if (typesz3 != typesz or sz != sz3){
28157  double *result3;
28158  result3 = (double *) malloc(typesz3);
28159  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28160 
28161  v3->assign(result3, result3+sz3);
28162  free(result3);
28163  }
28164  else{
28165  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28166 
28167  v3->assign(result, result+sz3);
28168  }
28169 
28170  clReleaseCommandQueue (queue);
28171  clReleaseMemObject(buffer);
28172  clReleaseMemObject(buffer2);
28173  clReleaseMemObject(buffer3);
28174  clReleaseEvent(gpuExec);
28175  free(result);
28176  }
28177 
28178 
28179  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
28180  size_t sz = v.size();
28181  size_t sz2 = v2.size();
28182  size_t sz3 = v3.size();
28183  size_t typesz = sizeof(double) * sz;
28184  size_t typesz2 = sizeof(float) * sz2;
28185  size_t typesz3 = sizeof(char) * sz3;
28186  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28187  if (temp_sz > 0){
28188  if (temp_sz > 2){
28189  if (temp_sz > 3){
28190  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28191  }
28192  typesz = sizeof(double) * params->buffers_size[0];
28193  typesz2 = sizeof(float) * params->buffers_size[1];
28194  typesz3 = sizeof(char) * params->buffers_size[2];
28195  }
28196  else{
28197  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28198  }
28199  }
28200  cl_int error = 0;
28201  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28202  checkError(error);
28203  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28204  checkError(error);
28205  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28206  checkError(error);
28207  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28208  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28209  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28210  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28211 
28212  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
28213  checkError (error);
28214  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28215  checkError (error);
28216  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28217  checkError (error);
28218 
28219  size_t size[3] = {sz, sz2, sz3};
28220  size_t work_dimension = 3;
28221 
28222  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28223  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28224  work_dimension = 1;
28225  }
28226  else if(temp_sz > 0){
28227  if (params->multi_dimensional){
28228  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28229  For default multidimensional global work size, leave the global_work_size vector empty, \
28230  and set multi_dimensional to true. Setting the global work size based on the values inside \
28231  the global_work_size vector.");
28232  }
28233  if (temp_sz == 1){
28234  size[0] = params->global_work_size[0];
28235  work_dimension = 1;
28236  }
28237  else if (temp_sz == 2){
28238  size[0] = params->global_work_size[0];
28239  size[1] = params->global_work_size[1];
28240  work_dimension = 2;
28241  }
28242  else{
28243  size[0] = params->global_work_size[0];
28244  size[1] = params->global_work_size[1];
28245  size[2] = params->global_work_size[2];
28246  if (temp_sz > 3){
28247  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28248  }
28249  }
28250  }
28251 
28252  cl_event gpuExec;
28253 
28254  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28255 
28256  clWaitForEvents(1, &gpuExec);
28257 
28258  double *result = (double *) malloc(typesz);
28259  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28260 
28261  std::vector<double> res = std::vector<double>();
28262  res.assign(result, result+sz);
28263 
28264  clReleaseCommandQueue (queue);
28265  clReleaseMemObject(buffer);
28266  clReleaseMemObject(buffer2);
28267  clReleaseMemObject(buffer3);
28268  clReleaseEvent(gpuExec);
28269  free(result);
28270 
28271  return res;
28272  }
28273 
28274  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<float> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
28275  size_t sz = v->size();
28276  size_t sz2 = v2.size();
28277  size_t sz3 = v3.size();
28278  size_t typesz = sizeof(double) * sz;
28279  size_t typesz2 = sizeof(float) * sz2;
28280  size_t typesz3 = sizeof(char) * sz3;
28281  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28282  if (temp_sz > 0){
28283  if (temp_sz > 2){
28284  if (temp_sz > 3){
28285  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28286  }
28287  typesz = sizeof(double) * params->buffers_size[0];
28288  typesz2 = sizeof(float) * params->buffers_size[1];
28289  typesz3 = sizeof(char) * params->buffers_size[2];
28290  }
28291  else{
28292  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28293  }
28294  }
28295  cl_int error = 0;
28296  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28297  checkError(error);
28298  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28299  checkError(error);
28300  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28301  checkError(error);
28302  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28303  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28304  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28305  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28306 
28307  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28308  checkError (error);
28309  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28310  checkError (error);
28311  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28312  checkError (error);
28313 
28314  size_t size[3] = {sz, sz2, sz3};
28315  size_t work_dimension = 3;
28316 
28317  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28318  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28319  work_dimension = 1;
28320  }
28321  else if(temp_sz > 0){
28322  if (params->multi_dimensional){
28323  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28324  For default multidimensional global work size, leave the global_work_size vector empty, \
28325  and set multi_dimensional to true. Setting the global work size based on the values inside \
28326  the global_work_size vector.");
28327  }
28328  if (temp_sz == 1){
28329  size[0] = params->global_work_size[0];
28330  work_dimension = 1;
28331  }
28332  else if (temp_sz == 2){
28333  size[0] = params->global_work_size[0];
28334  size[1] = params->global_work_size[1];
28335  work_dimension = 2;
28336  }
28337  else{
28338  size[0] = params->global_work_size[0];
28339  size[1] = params->global_work_size[1];
28340  size[2] = params->global_work_size[2];
28341  if (temp_sz > 3){
28342  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28343  }
28344  }
28345  }
28346 
28347  cl_event gpuExec;
28348 
28349  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28350 
28351  clWaitForEvents(1, &gpuExec);
28352 
28353  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28354 
28355  clWaitForEvents(1, &gpuExec);
28356 
28357  double *result = (double *) malloc(typesz);
28358  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28359 
28360  v->assign(result, result+sz);
28361 
28362  clReleaseCommandQueue (queue);
28363  clReleaseMemObject(buffer);
28364  clReleaseMemObject(buffer2);
28365  clReleaseMemObject(buffer3);
28366  clReleaseEvent(gpuExec);
28367  free(result);
28368  }
28369 
28370  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
28371  size_t sz = v->size();
28372  size_t sz2 = v2->size();
28373  size_t sz3 = v3.size();
28374  size_t typesz = sizeof(double) * sz;
28375  size_t typesz2 = sizeof(float) * sz2;
28376  size_t typesz3 = sizeof(char) * sz3;
28377  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28378  if (temp_sz > 0){
28379  if (temp_sz > 2){
28380  if (temp_sz > 3){
28381  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28382  }
28383  typesz = sizeof(double) * params->buffers_size[0];
28384  typesz2 = sizeof(float) * params->buffers_size[1];
28385  typesz3 = sizeof(char) * params->buffers_size[2];
28386  }
28387  else{
28388  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28389  }
28390  }
28391  cl_int error = 0;
28392  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28393  checkError(error);
28394  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28395  checkError(error);
28396  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28397  checkError(error);
28398  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28399  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28400  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28401  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28402 
28403  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28404  checkError (error);
28405  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28406  checkError (error);
28407  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28408  checkError (error);
28409 
28410  size_t size[3] = {sz, sz2, sz3};
28411  size_t work_dimension = 3;
28412 
28413  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28414  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28415  work_dimension = 1;
28416  }
28417  else if(temp_sz > 0){
28418  if (params->multi_dimensional){
28419  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28420  For default multidimensional global work size, leave the global_work_size vector empty, \
28421  and set multi_dimensional to true. Setting the global work size based on the values inside \
28422  the global_work_size vector.");
28423  }
28424  if (temp_sz == 1){
28425  size[0] = params->global_work_size[0];
28426  work_dimension = 1;
28427  }
28428  else if (temp_sz == 2){
28429  size[0] = params->global_work_size[0];
28430  size[1] = params->global_work_size[1];
28431  work_dimension = 2;
28432  }
28433  else{
28434  size[0] = params->global_work_size[0];
28435  size[1] = params->global_work_size[1];
28436  size[2] = params->global_work_size[2];
28437  if (temp_sz > 3){
28438  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28439  }
28440  }
28441  }
28442 
28443  cl_event gpuExec;
28444 
28445  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28446 
28447  clWaitForEvents(1, &gpuExec);
28448 
28449  double *result = (double *) malloc(typesz);
28450  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28451 
28452  v->assign(result, result+sz);
28453 
28454  if (typesz2 != typesz or sz != sz2){
28455  float *result2;
28456  result2 = (float *) malloc(typesz2);
28457  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28458 
28459  v2->assign(result2, result2+sz2);
28460  free(result2);
28461  }
28462  else{
28463  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28464 
28465  v2->assign(result, result+sz2);
28466  }
28467 
28468  clReleaseCommandQueue (queue);
28469  clReleaseMemObject(buffer);
28470  clReleaseMemObject(buffer2);
28471  clReleaseMemObject(buffer3);
28472  clReleaseEvent(gpuExec);
28473  free(result);
28474  }
28475 
28476  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
28477  size_t sz = v->size();
28478  size_t sz2 = v2->size();
28479  size_t sz3 = v3->size();
28480  size_t typesz = sizeof(double) * sz;
28481  size_t typesz2 = sizeof(float) * sz2;
28482  size_t typesz3 = sizeof(char) * sz3;
28483  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28484  if (temp_sz > 0){
28485  if (temp_sz > 2){
28486  if (temp_sz > 3){
28487  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28488  }
28489  typesz = sizeof(double) * params->buffers_size[0];
28490  typesz2 = sizeof(float) * params->buffers_size[1];
28491  typesz3 = sizeof(char) * params->buffers_size[2];
28492  }
28493  else{
28494  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28495  }
28496  }
28497  cl_int error = 0;
28498  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28499  checkError(error);
28500  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28501  checkError(error);
28502  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28503  checkError(error);
28504  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28505  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28506  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28507  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28508 
28509  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28510  checkError (error);
28511  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28512  checkError (error);
28513  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28514  checkError (error);
28515 
28516  size_t size[3] = {sz, sz2, sz3};
28517  size_t work_dimension = 3;
28518 
28519  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28520  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28521  work_dimension = 1;
28522  }
28523  else if(temp_sz > 0){
28524  if (params->multi_dimensional){
28525  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28526  For default multidimensional global work size, leave the global_work_size vector empty, \
28527  and set multi_dimensional to true. Setting the global work size based on the values inside \
28528  the global_work_size vector.");
28529  }
28530  if (temp_sz == 1){
28531  size[0] = params->global_work_size[0];
28532  work_dimension = 1;
28533  }
28534  else if (temp_sz == 2){
28535  size[0] = params->global_work_size[0];
28536  size[1] = params->global_work_size[1];
28537  work_dimension = 2;
28538  }
28539  else{
28540  size[0] = params->global_work_size[0];
28541  size[1] = params->global_work_size[1];
28542  size[2] = params->global_work_size[2];
28543  if (temp_sz > 3){
28544  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28545  }
28546  }
28547  }
28548 
28549  cl_event gpuExec;
28550 
28551  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28552 
28553  clWaitForEvents(1, &gpuExec);
28554 
28555  double *result = (double *) malloc(typesz);
28556  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28557 
28558  v->assign(result, result+sz);
28559 
28560  if (typesz2 != typesz or sz != sz2){
28561  float *result2;
28562  result2 = (float *) malloc(typesz2);
28563  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28564 
28565  v2->assign(result2, result2+sz2);
28566  free(result2);
28567  }
28568  else{
28569  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28570 
28571  v2->assign(result, result+sz2);
28572  }
28573 
28574  if (typesz3 != typesz or sz != sz3){
28575  char *result3;
28576  result3 = (char *) malloc(typesz3);
28577  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28578 
28579  v3->assign(result3, result3+sz3);
28580  free(result3);
28581  }
28582  else{
28583  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28584 
28585  v3->assign(result, result+sz3);
28586  }
28587 
28588  clReleaseCommandQueue (queue);
28589  clReleaseMemObject(buffer);
28590  clReleaseMemObject(buffer2);
28591  clReleaseMemObject(buffer3);
28592  clReleaseEvent(gpuExec);
28593  free(result);
28594  }
28595 
28596 
28597  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
28598  size_t sz = v.size();
28599  size_t sz2 = v2.size();
28600  size_t sz3 = v3.size();
28601  size_t typesz = sizeof(double) * sz;
28602  size_t typesz2 = sizeof(float) * sz2;
28603  size_t typesz3 = sizeof(int) * sz3;
28604  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28605  if (temp_sz > 0){
28606  if (temp_sz > 2){
28607  if (temp_sz > 3){
28608  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28609  }
28610  typesz = sizeof(double) * params->buffers_size[0];
28611  typesz2 = sizeof(float) * params->buffers_size[1];
28612  typesz3 = sizeof(int) * params->buffers_size[2];
28613  }
28614  else{
28615  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28616  }
28617  }
28618  cl_int error = 0;
28619  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28620  checkError(error);
28621  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28622  checkError(error);
28623  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28624  checkError(error);
28625  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28626  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28627  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28628  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28629 
28630  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
28631  checkError (error);
28632  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28633  checkError (error);
28634  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28635  checkError (error);
28636 
28637  size_t size[3] = {sz, sz2, sz3};
28638  size_t work_dimension = 3;
28639 
28640  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28641  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28642  work_dimension = 1;
28643  }
28644  else if(temp_sz > 0){
28645  if (params->multi_dimensional){
28646  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28647  For default multidimensional global work size, leave the global_work_size vector empty, \
28648  and set multi_dimensional to true. Setting the global work size based on the values inside \
28649  the global_work_size vector.");
28650  }
28651  if (temp_sz == 1){
28652  size[0] = params->global_work_size[0];
28653  work_dimension = 1;
28654  }
28655  else if (temp_sz == 2){
28656  size[0] = params->global_work_size[0];
28657  size[1] = params->global_work_size[1];
28658  work_dimension = 2;
28659  }
28660  else{
28661  size[0] = params->global_work_size[0];
28662  size[1] = params->global_work_size[1];
28663  size[2] = params->global_work_size[2];
28664  if (temp_sz > 3){
28665  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28666  }
28667  }
28668  }
28669 
28670  cl_event gpuExec;
28671 
28672  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28673 
28674  clWaitForEvents(1, &gpuExec);
28675 
28676  double *result = (double *) malloc(typesz);
28677  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28678 
28679  std::vector<double> res = std::vector<double>();
28680  res.assign(result, result+sz);
28681 
28682  clReleaseCommandQueue (queue);
28683  clReleaseMemObject(buffer);
28684  clReleaseMemObject(buffer2);
28685  clReleaseMemObject(buffer3);
28686  clReleaseEvent(gpuExec);
28687  free(result);
28688 
28689  return res;
28690  }
28691 
28692  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<float> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
28693  size_t sz = v->size();
28694  size_t sz2 = v2.size();
28695  size_t sz3 = v3.size();
28696  size_t typesz = sizeof(double) * sz;
28697  size_t typesz2 = sizeof(float) * sz2;
28698  size_t typesz3 = sizeof(int) * sz3;
28699  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28700  if (temp_sz > 0){
28701  if (temp_sz > 2){
28702  if (temp_sz > 3){
28703  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28704  }
28705  typesz = sizeof(double) * params->buffers_size[0];
28706  typesz2 = sizeof(float) * params->buffers_size[1];
28707  typesz3 = sizeof(int) * params->buffers_size[2];
28708  }
28709  else{
28710  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28711  }
28712  }
28713  cl_int error = 0;
28714  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28715  checkError(error);
28716  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28717  checkError(error);
28718  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28719  checkError(error);
28720  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28721  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28722  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28723  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28724 
28725  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28726  checkError (error);
28727  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28728  checkError (error);
28729  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28730  checkError (error);
28731 
28732  size_t size[3] = {sz, sz2, sz3};
28733  size_t work_dimension = 3;
28734 
28735  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28736  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28737  work_dimension = 1;
28738  }
28739  else if(temp_sz > 0){
28740  if (params->multi_dimensional){
28741  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28742  For default multidimensional global work size, leave the global_work_size vector empty, \
28743  and set multi_dimensional to true. Setting the global work size based on the values inside \
28744  the global_work_size vector.");
28745  }
28746  if (temp_sz == 1){
28747  size[0] = params->global_work_size[0];
28748  work_dimension = 1;
28749  }
28750  else if (temp_sz == 2){
28751  size[0] = params->global_work_size[0];
28752  size[1] = params->global_work_size[1];
28753  work_dimension = 2;
28754  }
28755  else{
28756  size[0] = params->global_work_size[0];
28757  size[1] = params->global_work_size[1];
28758  size[2] = params->global_work_size[2];
28759  if (temp_sz > 3){
28760  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28761  }
28762  }
28763  }
28764 
28765  cl_event gpuExec;
28766 
28767  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28768 
28769  clWaitForEvents(1, &gpuExec);
28770 
28771  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28772 
28773  clWaitForEvents(1, &gpuExec);
28774 
28775  double *result = (double *) malloc(typesz);
28776  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28777 
28778  v->assign(result, result+sz);
28779 
28780  clReleaseCommandQueue (queue);
28781  clReleaseMemObject(buffer);
28782  clReleaseMemObject(buffer2);
28783  clReleaseMemObject(buffer3);
28784  clReleaseEvent(gpuExec);
28785  free(result);
28786  }
28787 
28788  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
28789  size_t sz = v->size();
28790  size_t sz2 = v2->size();
28791  size_t sz3 = v3.size();
28792  size_t typesz = sizeof(double) * sz;
28793  size_t typesz2 = sizeof(float) * sz2;
28794  size_t typesz3 = sizeof(int) * sz3;
28795  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28796  if (temp_sz > 0){
28797  if (temp_sz > 2){
28798  if (temp_sz > 3){
28799  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28800  }
28801  typesz = sizeof(double) * params->buffers_size[0];
28802  typesz2 = sizeof(float) * params->buffers_size[1];
28803  typesz3 = sizeof(int) * params->buffers_size[2];
28804  }
28805  else{
28806  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28807  }
28808  }
28809  cl_int error = 0;
28810  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28811  checkError(error);
28812  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28813  checkError(error);
28814  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28815  checkError(error);
28816  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28817  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28818  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28819  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28820 
28821  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28822  checkError (error);
28823  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28824  checkError (error);
28825  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28826  checkError (error);
28827 
28828  size_t size[3] = {sz, sz2, sz3};
28829  size_t work_dimension = 3;
28830 
28831  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28832  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28833  work_dimension = 1;
28834  }
28835  else if(temp_sz > 0){
28836  if (params->multi_dimensional){
28837  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28838  For default multidimensional global work size, leave the global_work_size vector empty, \
28839  and set multi_dimensional to true. Setting the global work size based on the values inside \
28840  the global_work_size vector.");
28841  }
28842  if (temp_sz == 1){
28843  size[0] = params->global_work_size[0];
28844  work_dimension = 1;
28845  }
28846  else if (temp_sz == 2){
28847  size[0] = params->global_work_size[0];
28848  size[1] = params->global_work_size[1];
28849  work_dimension = 2;
28850  }
28851  else{
28852  size[0] = params->global_work_size[0];
28853  size[1] = params->global_work_size[1];
28854  size[2] = params->global_work_size[2];
28855  if (temp_sz > 3){
28856  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28857  }
28858  }
28859  }
28860 
28861  cl_event gpuExec;
28862 
28863  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28864 
28865  clWaitForEvents(1, &gpuExec);
28866 
28867  double *result = (double *) malloc(typesz);
28868  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28869 
28870  v->assign(result, result+sz);
28871 
28872  if (typesz2 != typesz or sz != sz2){
28873  float *result2;
28874  result2 = (float *) malloc(typesz2);
28875  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28876 
28877  v2->assign(result2, result2+sz2);
28878  free(result2);
28879  }
28880  else{
28881  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28882 
28883  v2->assign(result, result+sz2);
28884  }
28885 
28886  clReleaseCommandQueue (queue);
28887  clReleaseMemObject(buffer);
28888  clReleaseMemObject(buffer2);
28889  clReleaseMemObject(buffer3);
28890  clReleaseEvent(gpuExec);
28891  free(result);
28892  }
28893 
28894  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
28895  size_t sz = v->size();
28896  size_t sz2 = v2->size();
28897  size_t sz3 = v3->size();
28898  size_t typesz = sizeof(double) * sz;
28899  size_t typesz2 = sizeof(float) * sz2;
28900  size_t typesz3 = sizeof(int) * sz3;
28901  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
28902  if (temp_sz > 0){
28903  if (temp_sz > 2){
28904  if (temp_sz > 3){
28905  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28906  }
28907  typesz = sizeof(double) * params->buffers_size[0];
28908  typesz2 = sizeof(float) * params->buffers_size[1];
28909  typesz3 = sizeof(int) * params->buffers_size[2];
28910  }
28911  else{
28912  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28913  }
28914  }
28915  cl_int error = 0;
28916  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28917  checkError(error);
28918  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28919  checkError(error);
28920  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28921  checkError(error);
28922  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
28923  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
28924  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
28925  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
28926 
28927  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28928  checkError (error);
28929  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28930  checkError (error);
28931  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28932  checkError (error);
28933 
28934  size_t size[3] = {sz, sz2, sz3};
28935  size_t work_dimension = 3;
28936 
28937  temp_sz = params != NULL ? params->global_work_size.size() : 0;
28938  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
28939  work_dimension = 1;
28940  }
28941  else if(temp_sz > 0){
28942  if (params->multi_dimensional){
28943  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
28944  For default multidimensional global work size, leave the global_work_size vector empty, \
28945  and set multi_dimensional to true. Setting the global work size based on the values inside \
28946  the global_work_size vector.");
28947  }
28948  if (temp_sz == 1){
28949  size[0] = params->global_work_size[0];
28950  work_dimension = 1;
28951  }
28952  else if (temp_sz == 2){
28953  size[0] = params->global_work_size[0];
28954  size[1] = params->global_work_size[1];
28955  work_dimension = 2;
28956  }
28957  else{
28958  size[0] = params->global_work_size[0];
28959  size[1] = params->global_work_size[1];
28960  size[2] = params->global_work_size[2];
28961  if (temp_sz > 3){
28962  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28963  }
28964  }
28965  }
28966 
28967  cl_event gpuExec;
28968 
28969  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28970 
28971  clWaitForEvents(1, &gpuExec);
28972 
28973  double *result = (double *) malloc(typesz);
28974  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28975 
28976  v->assign(result, result+sz);
28977 
28978  if (typesz2 != typesz or sz != sz2){
28979  float *result2;
28980  result2 = (float *) malloc(typesz2);
28981  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28982 
28983  v2->assign(result2, result2+sz2);
28984  free(result2);
28985  }
28986  else{
28987  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28988 
28989  v2->assign(result, result+sz2);
28990  }
28991 
28992  if (typesz3 != typesz or sz != sz3){
28993  int *result3;
28994  result3 = (int *) malloc(typesz3);
28995  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28996 
28997  v3->assign(result3, result3+sz3);
28998  free(result3);
28999  }
29000  else{
29001  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29002 
29003  v3->assign(result, result+sz3);
29004  }
29005 
29006  clReleaseCommandQueue (queue);
29007  clReleaseMemObject(buffer);
29008  clReleaseMemObject(buffer2);
29009  clReleaseMemObject(buffer3);
29010  clReleaseEvent(gpuExec);
29011  free(result);
29012  }
29013 
29014 
29015  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
29016  size_t sz = v.size();
29017  size_t sz2 = v2.size();
29018  size_t sz3 = v3.size();
29019  size_t typesz = sizeof(double) * sz;
29020  size_t typesz2 = sizeof(float) * sz2;
29021  size_t typesz3 = sizeof(float) * sz3;
29022  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29023  if (temp_sz > 0){
29024  if (temp_sz > 2){
29025  if (temp_sz > 3){
29026  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29027  }
29028  typesz = sizeof(double) * params->buffers_size[0];
29029  typesz2 = sizeof(float) * params->buffers_size[1];
29030  typesz3 = sizeof(float) * params->buffers_size[2];
29031  }
29032  else{
29033  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29034  }
29035  }
29036  cl_int error = 0;
29037  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29038  checkError(error);
29039  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29040  checkError(error);
29041  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29042  checkError(error);
29043  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29044  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29045  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29046  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29047 
29048  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29049  checkError (error);
29050  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29051  checkError (error);
29052  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29053  checkError (error);
29054 
29055  size_t size[3] = {sz, sz2, sz3};
29056  size_t work_dimension = 3;
29057 
29058  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29059  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29060  work_dimension = 1;
29061  }
29062  else if(temp_sz > 0){
29063  if (params->multi_dimensional){
29064  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29065  For default multidimensional global work size, leave the global_work_size vector empty, \
29066  and set multi_dimensional to true. Setting the global work size based on the values inside \
29067  the global_work_size vector.");
29068  }
29069  if (temp_sz == 1){
29070  size[0] = params->global_work_size[0];
29071  work_dimension = 1;
29072  }
29073  else if (temp_sz == 2){
29074  size[0] = params->global_work_size[0];
29075  size[1] = params->global_work_size[1];
29076  work_dimension = 2;
29077  }
29078  else{
29079  size[0] = params->global_work_size[0];
29080  size[1] = params->global_work_size[1];
29081  size[2] = params->global_work_size[2];
29082  if (temp_sz > 3){
29083  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29084  }
29085  }
29086  }
29087 
29088  cl_event gpuExec;
29089 
29090  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29091 
29092  clWaitForEvents(1, &gpuExec);
29093 
29094  double *result = (double *) malloc(typesz);
29095  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29096 
29097  std::vector<double> res = std::vector<double>();
29098  res.assign(result, result+sz);
29099 
29100  clReleaseCommandQueue (queue);
29101  clReleaseMemObject(buffer);
29102  clReleaseMemObject(buffer2);
29103  clReleaseMemObject(buffer3);
29104  clReleaseEvent(gpuExec);
29105  free(result);
29106 
29107  return res;
29108  }
29109 
29110  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<float> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
29111  size_t sz = v->size();
29112  size_t sz2 = v2.size();
29113  size_t sz3 = v3.size();
29114  size_t typesz = sizeof(double) * sz;
29115  size_t typesz2 = sizeof(float) * sz2;
29116  size_t typesz3 = sizeof(float) * sz3;
29117  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29118  if (temp_sz > 0){
29119  if (temp_sz > 2){
29120  if (temp_sz > 3){
29121  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29122  }
29123  typesz = sizeof(double) * params->buffers_size[0];
29124  typesz2 = sizeof(float) * params->buffers_size[1];
29125  typesz3 = sizeof(float) * params->buffers_size[2];
29126  }
29127  else{
29128  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29129  }
29130  }
29131  cl_int error = 0;
29132  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29133  checkError(error);
29134  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29135  checkError(error);
29136  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29137  checkError(error);
29138  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29139  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29140  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29141  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29142 
29143  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29144  checkError (error);
29145  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29146  checkError (error);
29147  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29148  checkError (error);
29149 
29150  size_t size[3] = {sz, sz2, sz3};
29151  size_t work_dimension = 3;
29152 
29153  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29154  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29155  work_dimension = 1;
29156  }
29157  else if(temp_sz > 0){
29158  if (params->multi_dimensional){
29159  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29160  For default multidimensional global work size, leave the global_work_size vector empty, \
29161  and set multi_dimensional to true. Setting the global work size based on the values inside \
29162  the global_work_size vector.");
29163  }
29164  if (temp_sz == 1){
29165  size[0] = params->global_work_size[0];
29166  work_dimension = 1;
29167  }
29168  else if (temp_sz == 2){
29169  size[0] = params->global_work_size[0];
29170  size[1] = params->global_work_size[1];
29171  work_dimension = 2;
29172  }
29173  else{
29174  size[0] = params->global_work_size[0];
29175  size[1] = params->global_work_size[1];
29176  size[2] = params->global_work_size[2];
29177  if (temp_sz > 3){
29178  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29179  }
29180  }
29181  }
29182 
29183  cl_event gpuExec;
29184 
29185  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29186 
29187  clWaitForEvents(1, &gpuExec);
29188 
29189  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29190 
29191  clWaitForEvents(1, &gpuExec);
29192 
29193  double *result = (double *) malloc(typesz);
29194  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29195 
29196  v->assign(result, result+sz);
29197 
29198  clReleaseCommandQueue (queue);
29199  clReleaseMemObject(buffer);
29200  clReleaseMemObject(buffer2);
29201  clReleaseMemObject(buffer3);
29202  clReleaseEvent(gpuExec);
29203  free(result);
29204  }
29205 
29206  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
29207  size_t sz = v->size();
29208  size_t sz2 = v2->size();
29209  size_t sz3 = v3.size();
29210  size_t typesz = sizeof(double) * sz;
29211  size_t typesz2 = sizeof(float) * sz2;
29212  size_t typesz3 = sizeof(float) * sz3;
29213  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29214  if (temp_sz > 0){
29215  if (temp_sz > 2){
29216  if (temp_sz > 3){
29217  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29218  }
29219  typesz = sizeof(double) * params->buffers_size[0];
29220  typesz2 = sizeof(float) * params->buffers_size[1];
29221  typesz3 = sizeof(float) * params->buffers_size[2];
29222  }
29223  else{
29224  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29225  }
29226  }
29227  cl_int error = 0;
29228  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29229  checkError(error);
29230  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29231  checkError(error);
29232  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29233  checkError(error);
29234  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29235  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29236  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29237  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29238 
29239  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29240  checkError (error);
29241  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29242  checkError (error);
29243  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29244  checkError (error);
29245 
29246  size_t size[3] = {sz, sz2, sz3};
29247  size_t work_dimension = 3;
29248 
29249  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29250  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29251  work_dimension = 1;
29252  }
29253  else if(temp_sz > 0){
29254  if (params->multi_dimensional){
29255  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29256  For default multidimensional global work size, leave the global_work_size vector empty, \
29257  and set multi_dimensional to true. Setting the global work size based on the values inside \
29258  the global_work_size vector.");
29259  }
29260  if (temp_sz == 1){
29261  size[0] = params->global_work_size[0];
29262  work_dimension = 1;
29263  }
29264  else if (temp_sz == 2){
29265  size[0] = params->global_work_size[0];
29266  size[1] = params->global_work_size[1];
29267  work_dimension = 2;
29268  }
29269  else{
29270  size[0] = params->global_work_size[0];
29271  size[1] = params->global_work_size[1];
29272  size[2] = params->global_work_size[2];
29273  if (temp_sz > 3){
29274  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29275  }
29276  }
29277  }
29278 
29279  cl_event gpuExec;
29280 
29281  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29282 
29283  clWaitForEvents(1, &gpuExec);
29284 
29285  double *result = (double *) malloc(typesz);
29286  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29287 
29288  v->assign(result, result+sz);
29289 
29290  if (typesz2 != typesz or sz != sz2){
29291  float *result2;
29292  result2 = (float *) malloc(typesz2);
29293  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29294 
29295  v2->assign(result2, result2+sz2);
29296  free(result2);
29297  }
29298  else{
29299  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29300 
29301  v2->assign(result, result+sz2);
29302  }
29303 
29304  clReleaseCommandQueue (queue);
29305  clReleaseMemObject(buffer);
29306  clReleaseMemObject(buffer2);
29307  clReleaseMemObject(buffer3);
29308  clReleaseEvent(gpuExec);
29309  free(result);
29310  }
29311 
29312  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
29313  size_t sz = v->size();
29314  size_t sz2 = v2->size();
29315  size_t sz3 = v3->size();
29316  size_t typesz = sizeof(double) * sz;
29317  size_t typesz2 = sizeof(float) * sz2;
29318  size_t typesz3 = sizeof(float) * sz3;
29319  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29320  if (temp_sz > 0){
29321  if (temp_sz > 2){
29322  if (temp_sz > 3){
29323  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29324  }
29325  typesz = sizeof(double) * params->buffers_size[0];
29326  typesz2 = sizeof(float) * params->buffers_size[1];
29327  typesz3 = sizeof(float) * params->buffers_size[2];
29328  }
29329  else{
29330  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29331  }
29332  }
29333  cl_int error = 0;
29334  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29335  checkError(error);
29336  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29337  checkError(error);
29338  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
29339  checkError(error);
29340  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29341  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29342  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29343  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29344 
29345  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29346  checkError (error);
29347  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29348  checkError (error);
29349  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
29350  checkError (error);
29351 
29352  size_t size[3] = {sz, sz2, sz3};
29353  size_t work_dimension = 3;
29354 
29355  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29356  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29357  work_dimension = 1;
29358  }
29359  else if(temp_sz > 0){
29360  if (params->multi_dimensional){
29361  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29362  For default multidimensional global work size, leave the global_work_size vector empty, \
29363  and set multi_dimensional to true. Setting the global work size based on the values inside \
29364  the global_work_size vector.");
29365  }
29366  if (temp_sz == 1){
29367  size[0] = params->global_work_size[0];
29368  work_dimension = 1;
29369  }
29370  else if (temp_sz == 2){
29371  size[0] = params->global_work_size[0];
29372  size[1] = params->global_work_size[1];
29373  work_dimension = 2;
29374  }
29375  else{
29376  size[0] = params->global_work_size[0];
29377  size[1] = params->global_work_size[1];
29378  size[2] = params->global_work_size[2];
29379  if (temp_sz > 3){
29380  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29381  }
29382  }
29383  }
29384 
29385  cl_event gpuExec;
29386 
29387  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29388 
29389  clWaitForEvents(1, &gpuExec);
29390 
29391  double *result = (double *) malloc(typesz);
29392  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29393 
29394  v->assign(result, result+sz);
29395 
29396  if (typesz2 != typesz or sz != sz2){
29397  float *result2;
29398  result2 = (float *) malloc(typesz2);
29399  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29400 
29401  v2->assign(result2, result2+sz2);
29402  free(result2);
29403  }
29404  else{
29405  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29406 
29407  v2->assign(result, result+sz2);
29408  }
29409 
29410  if (typesz3 != typesz or sz != sz3){
29411  float *result3;
29412  result3 = (float *) malloc(typesz3);
29413  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
29414 
29415  v3->assign(result3, result3+sz3);
29416  free(result3);
29417  }
29418  else{
29419  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29420 
29421  v3->assign(result, result+sz3);
29422  }
29423 
29424  clReleaseCommandQueue (queue);
29425  clReleaseMemObject(buffer);
29426  clReleaseMemObject(buffer2);
29427  clReleaseMemObject(buffer3);
29428  clReleaseEvent(gpuExec);
29429  free(result);
29430  }
29431 
29432 
29433  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
29434  size_t sz = v.size();
29435  size_t sz2 = v2.size();
29436  size_t sz3 = v3.size();
29437  size_t typesz = sizeof(double) * sz;
29438  size_t typesz2 = sizeof(float) * sz2;
29439  size_t typesz3 = sizeof(double) * sz3;
29440  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29441  if (temp_sz > 0){
29442  if (temp_sz > 2){
29443  if (temp_sz > 3){
29444  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29445  }
29446  typesz = sizeof(double) * params->buffers_size[0];
29447  typesz2 = sizeof(float) * params->buffers_size[1];
29448  typesz3 = sizeof(double) * params->buffers_size[2];
29449  }
29450  else{
29451  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29452  }
29453  }
29454  cl_int error = 0;
29455  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29456  checkError(error);
29457  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29458  checkError(error);
29459  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29460  checkError(error);
29461  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29462  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29463  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29464  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29465 
29466  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29467  checkError (error);
29468  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29469  checkError (error);
29470  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29471  checkError (error);
29472 
29473  size_t size[3] = {sz, sz2, sz3};
29474  size_t work_dimension = 3;
29475 
29476  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29477  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29478  work_dimension = 1;
29479  }
29480  else if(temp_sz > 0){
29481  if (params->multi_dimensional){
29482  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29483  For default multidimensional global work size, leave the global_work_size vector empty, \
29484  and set multi_dimensional to true. Setting the global work size based on the values inside \
29485  the global_work_size vector.");
29486  }
29487  if (temp_sz == 1){
29488  size[0] = params->global_work_size[0];
29489  work_dimension = 1;
29490  }
29491  else if (temp_sz == 2){
29492  size[0] = params->global_work_size[0];
29493  size[1] = params->global_work_size[1];
29494  work_dimension = 2;
29495  }
29496  else{
29497  size[0] = params->global_work_size[0];
29498  size[1] = params->global_work_size[1];
29499  size[2] = params->global_work_size[2];
29500  if (temp_sz > 3){
29501  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29502  }
29503  }
29504  }
29505 
29506  cl_event gpuExec;
29507 
29508  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29509 
29510  clWaitForEvents(1, &gpuExec);
29511 
29512  double *result = (double *) malloc(typesz);
29513  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29514 
29515  std::vector<double> res = std::vector<double>();
29516  res.assign(result, result+sz);
29517 
29518  clReleaseCommandQueue (queue);
29519  clReleaseMemObject(buffer);
29520  clReleaseMemObject(buffer2);
29521  clReleaseMemObject(buffer3);
29522  clReleaseEvent(gpuExec);
29523  free(result);
29524 
29525  return res;
29526  }
29527 
29528  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<float> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
29529  size_t sz = v->size();
29530  size_t sz2 = v2.size();
29531  size_t sz3 = v3.size();
29532  size_t typesz = sizeof(double) * sz;
29533  size_t typesz2 = sizeof(float) * sz2;
29534  size_t typesz3 = sizeof(double) * sz3;
29535  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29536  if (temp_sz > 0){
29537  if (temp_sz > 2){
29538  if (temp_sz > 3){
29539  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29540  }
29541  typesz = sizeof(double) * params->buffers_size[0];
29542  typesz2 = sizeof(float) * params->buffers_size[1];
29543  typesz3 = sizeof(double) * params->buffers_size[2];
29544  }
29545  else{
29546  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29547  }
29548  }
29549  cl_int error = 0;
29550  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29551  checkError(error);
29552  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29553  checkError(error);
29554  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29555  checkError(error);
29556  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29557  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29558  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29559  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29560 
29561  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29562  checkError (error);
29563  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29564  checkError (error);
29565  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29566  checkError (error);
29567 
29568  size_t size[3] = {sz, sz2, sz3};
29569  size_t work_dimension = 3;
29570 
29571  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29572  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29573  work_dimension = 1;
29574  }
29575  else if(temp_sz > 0){
29576  if (params->multi_dimensional){
29577  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29578  For default multidimensional global work size, leave the global_work_size vector empty, \
29579  and set multi_dimensional to true. Setting the global work size based on the values inside \
29580  the global_work_size vector.");
29581  }
29582  if (temp_sz == 1){
29583  size[0] = params->global_work_size[0];
29584  work_dimension = 1;
29585  }
29586  else if (temp_sz == 2){
29587  size[0] = params->global_work_size[0];
29588  size[1] = params->global_work_size[1];
29589  work_dimension = 2;
29590  }
29591  else{
29592  size[0] = params->global_work_size[0];
29593  size[1] = params->global_work_size[1];
29594  size[2] = params->global_work_size[2];
29595  if (temp_sz > 3){
29596  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29597  }
29598  }
29599  }
29600 
29601  cl_event gpuExec;
29602 
29603  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29604 
29605  clWaitForEvents(1, &gpuExec);
29606 
29607  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29608 
29609  clWaitForEvents(1, &gpuExec);
29610 
29611  double *result = (double *) malloc(typesz);
29612  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29613 
29614  v->assign(result, result+sz);
29615 
29616  clReleaseCommandQueue (queue);
29617  clReleaseMemObject(buffer);
29618  clReleaseMemObject(buffer2);
29619  clReleaseMemObject(buffer3);
29620  clReleaseEvent(gpuExec);
29621  free(result);
29622  }
29623 
29624  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
29625  size_t sz = v->size();
29626  size_t sz2 = v2->size();
29627  size_t sz3 = v3.size();
29628  size_t typesz = sizeof(double) * sz;
29629  size_t typesz2 = sizeof(float) * sz2;
29630  size_t typesz3 = sizeof(double) * sz3;
29631  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29632  if (temp_sz > 0){
29633  if (temp_sz > 2){
29634  if (temp_sz > 3){
29635  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29636  }
29637  typesz = sizeof(double) * params->buffers_size[0];
29638  typesz2 = sizeof(float) * params->buffers_size[1];
29639  typesz3 = sizeof(double) * params->buffers_size[2];
29640  }
29641  else{
29642  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29643  }
29644  }
29645  cl_int error = 0;
29646  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29647  checkError(error);
29648  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29649  checkError(error);
29650  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29651  checkError(error);
29652  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29653  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29654  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29655  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29656 
29657  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29658  checkError (error);
29659  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29660  checkError (error);
29661  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29662  checkError (error);
29663 
29664  size_t size[3] = {sz, sz2, sz3};
29665  size_t work_dimension = 3;
29666 
29667  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29668  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29669  work_dimension = 1;
29670  }
29671  else if(temp_sz > 0){
29672  if (params->multi_dimensional){
29673  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29674  For default multidimensional global work size, leave the global_work_size vector empty, \
29675  and set multi_dimensional to true. Setting the global work size based on the values inside \
29676  the global_work_size vector.");
29677  }
29678  if (temp_sz == 1){
29679  size[0] = params->global_work_size[0];
29680  work_dimension = 1;
29681  }
29682  else if (temp_sz == 2){
29683  size[0] = params->global_work_size[0];
29684  size[1] = params->global_work_size[1];
29685  work_dimension = 2;
29686  }
29687  else{
29688  size[0] = params->global_work_size[0];
29689  size[1] = params->global_work_size[1];
29690  size[2] = params->global_work_size[2];
29691  if (temp_sz > 3){
29692  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29693  }
29694  }
29695  }
29696 
29697  cl_event gpuExec;
29698 
29699  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29700 
29701  clWaitForEvents(1, &gpuExec);
29702 
29703  double *result = (double *) malloc(typesz);
29704  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29705 
29706  v->assign(result, result+sz);
29707 
29708  if (typesz2 != typesz or sz != sz2){
29709  float *result2;
29710  result2 = (float *) malloc(typesz2);
29711  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29712 
29713  v2->assign(result2, result2+sz2);
29714  free(result2);
29715  }
29716  else{
29717  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29718 
29719  v2->assign(result, result+sz2);
29720  }
29721 
29722  clReleaseCommandQueue (queue);
29723  clReleaseMemObject(buffer);
29724  clReleaseMemObject(buffer2);
29725  clReleaseMemObject(buffer3);
29726  clReleaseEvent(gpuExec);
29727  free(result);
29728  }
29729 
29730  void ROS_OpenCL::process(std::vector<double>* v, std::vector<float>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
29731  size_t sz = v->size();
29732  size_t sz2 = v2->size();
29733  size_t sz3 = v3->size();
29734  size_t typesz = sizeof(double) * sz;
29735  size_t typesz2 = sizeof(float) * sz2;
29736  size_t typesz3 = sizeof(double) * sz3;
29737  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29738  if (temp_sz > 0){
29739  if (temp_sz > 2){
29740  if (temp_sz > 3){
29741  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29742  }
29743  typesz = sizeof(double) * params->buffers_size[0];
29744  typesz2 = sizeof(float) * params->buffers_size[1];
29745  typesz3 = sizeof(double) * params->buffers_size[2];
29746  }
29747  else{
29748  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29749  }
29750  }
29751  cl_int error = 0;
29752  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29753  checkError(error);
29754  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29755  checkError(error);
29756  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
29757  checkError(error);
29758  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29759  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29760  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29761  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29762 
29763  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29764  checkError (error);
29765  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29766  checkError (error);
29767  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
29768  checkError (error);
29769 
29770  size_t size[3] = {sz, sz2, sz3};
29771  size_t work_dimension = 3;
29772 
29773  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29774  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29775  work_dimension = 1;
29776  }
29777  else if(temp_sz > 0){
29778  if (params->multi_dimensional){
29779  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29780  For default multidimensional global work size, leave the global_work_size vector empty, \
29781  and set multi_dimensional to true. Setting the global work size based on the values inside \
29782  the global_work_size vector.");
29783  }
29784  if (temp_sz == 1){
29785  size[0] = params->global_work_size[0];
29786  work_dimension = 1;
29787  }
29788  else if (temp_sz == 2){
29789  size[0] = params->global_work_size[0];
29790  size[1] = params->global_work_size[1];
29791  work_dimension = 2;
29792  }
29793  else{
29794  size[0] = params->global_work_size[0];
29795  size[1] = params->global_work_size[1];
29796  size[2] = params->global_work_size[2];
29797  if (temp_sz > 3){
29798  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29799  }
29800  }
29801  }
29802 
29803  cl_event gpuExec;
29804 
29805  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29806 
29807  clWaitForEvents(1, &gpuExec);
29808 
29809  double *result = (double *) malloc(typesz);
29810  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29811 
29812  v->assign(result, result+sz);
29813 
29814  if (typesz2 != typesz or sz != sz2){
29815  float *result2;
29816  result2 = (float *) malloc(typesz2);
29817  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29818 
29819  v2->assign(result2, result2+sz2);
29820  free(result2);
29821  }
29822  else{
29823  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29824 
29825  v2->assign(result, result+sz2);
29826  }
29827 
29828  if (typesz3 != typesz or sz != sz3){
29829  double *result3;
29830  result3 = (double *) malloc(typesz3);
29831  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
29832 
29833  v3->assign(result3, result3+sz3);
29834  free(result3);
29835  }
29836  else{
29837  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29838 
29839  v3->assign(result, result+sz3);
29840  }
29841 
29842  clReleaseCommandQueue (queue);
29843  clReleaseMemObject(buffer);
29844  clReleaseMemObject(buffer2);
29845  clReleaseMemObject(buffer3);
29846  clReleaseEvent(gpuExec);
29847  free(result);
29848  }
29849 
29850 
29851  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
29852  size_t sz = v.size();
29853  size_t sz2 = v2.size();
29854  size_t sz3 = v3.size();
29855  size_t typesz = sizeof(double) * sz;
29856  size_t typesz2 = sizeof(double) * sz2;
29857  size_t typesz3 = sizeof(char) * sz3;
29858  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29859  if (temp_sz > 0){
29860  if (temp_sz > 2){
29861  if (temp_sz > 3){
29862  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29863  }
29864  typesz = sizeof(double) * params->buffers_size[0];
29865  typesz2 = sizeof(double) * params->buffers_size[1];
29866  typesz3 = sizeof(char) * params->buffers_size[2];
29867  }
29868  else{
29869  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29870  }
29871  }
29872  cl_int error = 0;
29873  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29874  checkError(error);
29875  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29876  checkError(error);
29877  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29878  checkError(error);
29879  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29880  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29881  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29882  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29883 
29884  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29885  checkError (error);
29886  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29887  checkError (error);
29888  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29889  checkError (error);
29890 
29891  size_t size[3] = {sz, sz2, sz3};
29892  size_t work_dimension = 3;
29893 
29894  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29895  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29896  work_dimension = 1;
29897  }
29898  else if(temp_sz > 0){
29899  if (params->multi_dimensional){
29900  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29901  For default multidimensional global work size, leave the global_work_size vector empty, \
29902  and set multi_dimensional to true. Setting the global work size based on the values inside \
29903  the global_work_size vector.");
29904  }
29905  if (temp_sz == 1){
29906  size[0] = params->global_work_size[0];
29907  work_dimension = 1;
29908  }
29909  else if (temp_sz == 2){
29910  size[0] = params->global_work_size[0];
29911  size[1] = params->global_work_size[1];
29912  work_dimension = 2;
29913  }
29914  else{
29915  size[0] = params->global_work_size[0];
29916  size[1] = params->global_work_size[1];
29917  size[2] = params->global_work_size[2];
29918  if (temp_sz > 3){
29919  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29920  }
29921  }
29922  }
29923 
29924  cl_event gpuExec;
29925 
29926  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29927 
29928  clWaitForEvents(1, &gpuExec);
29929 
29930  double *result = (double *) malloc(typesz);
29931  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29932 
29933  std::vector<double> res = std::vector<double>();
29934  res.assign(result, result+sz);
29935 
29936  clReleaseCommandQueue (queue);
29937  clReleaseMemObject(buffer);
29938  clReleaseMemObject(buffer2);
29939  clReleaseMemObject(buffer3);
29940  clReleaseEvent(gpuExec);
29941  free(result);
29942 
29943  return res;
29944  }
29945 
29946  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<double> v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
29947  size_t sz = v->size();
29948  size_t sz2 = v2.size();
29949  size_t sz3 = v3.size();
29950  size_t typesz = sizeof(double) * sz;
29951  size_t typesz2 = sizeof(double) * sz2;
29952  size_t typesz3 = sizeof(char) * sz3;
29953  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
29954  if (temp_sz > 0){
29955  if (temp_sz > 2){
29956  if (temp_sz > 3){
29957  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29958  }
29959  typesz = sizeof(double) * params->buffers_size[0];
29960  typesz2 = sizeof(double) * params->buffers_size[1];
29961  typesz3 = sizeof(char) * params->buffers_size[2];
29962  }
29963  else{
29964  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29965  }
29966  }
29967  cl_int error = 0;
29968  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29969  checkError(error);
29970  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29971  checkError(error);
29972  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29973  checkError(error);
29974  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
29975  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
29976  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
29977  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
29978 
29979  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29980  checkError (error);
29981  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29982  checkError (error);
29983  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29984  checkError (error);
29985 
29986  size_t size[3] = {sz, sz2, sz3};
29987  size_t work_dimension = 3;
29988 
29989  temp_sz = params != NULL ? params->global_work_size.size() : 0;
29990  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
29991  work_dimension = 1;
29992  }
29993  else if(temp_sz > 0){
29994  if (params->multi_dimensional){
29995  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
29996  For default multidimensional global work size, leave the global_work_size vector empty, \
29997  and set multi_dimensional to true. Setting the global work size based on the values inside \
29998  the global_work_size vector.");
29999  }
30000  if (temp_sz == 1){
30001  size[0] = params->global_work_size[0];
30002  work_dimension = 1;
30003  }
30004  else if (temp_sz == 2){
30005  size[0] = params->global_work_size[0];
30006  size[1] = params->global_work_size[1];
30007  work_dimension = 2;
30008  }
30009  else{
30010  size[0] = params->global_work_size[0];
30011  size[1] = params->global_work_size[1];
30012  size[2] = params->global_work_size[2];
30013  if (temp_sz > 3){
30014  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30015  }
30016  }
30017  }
30018 
30019  cl_event gpuExec;
30020 
30021  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30022 
30023  clWaitForEvents(1, &gpuExec);
30024 
30025  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30026 
30027  clWaitForEvents(1, &gpuExec);
30028 
30029  double *result = (double *) malloc(typesz);
30030  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30031 
30032  v->assign(result, result+sz);
30033 
30034  clReleaseCommandQueue (queue);
30035  clReleaseMemObject(buffer);
30036  clReleaseMemObject(buffer2);
30037  clReleaseMemObject(buffer3);
30038  clReleaseEvent(gpuExec);
30039  free(result);
30040  }
30041 
30042  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, const std::vector<char> v3, const ROS_OpenCL_Params* params){
30043  size_t sz = v->size();
30044  size_t sz2 = v2->size();
30045  size_t sz3 = v3.size();
30046  size_t typesz = sizeof(double) * sz;
30047  size_t typesz2 = sizeof(double) * sz2;
30048  size_t typesz3 = sizeof(char) * sz3;
30049  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30050  if (temp_sz > 0){
30051  if (temp_sz > 2){
30052  if (temp_sz > 3){
30053  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30054  }
30055  typesz = sizeof(double) * params->buffers_size[0];
30056  typesz2 = sizeof(double) * params->buffers_size[1];
30057  typesz3 = sizeof(char) * params->buffers_size[2];
30058  }
30059  else{
30060  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30061  }
30062  }
30063  cl_int error = 0;
30064  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30065  checkError(error);
30066  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30067  checkError(error);
30068  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30069  checkError(error);
30070  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30071  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30072  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30073  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30074 
30075  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30076  checkError (error);
30077  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30078  checkError (error);
30079  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30080  checkError (error);
30081 
30082  size_t size[3] = {sz, sz2, sz3};
30083  size_t work_dimension = 3;
30084 
30085  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30086  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30087  work_dimension = 1;
30088  }
30089  else if(temp_sz > 0){
30090  if (params->multi_dimensional){
30091  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30092  For default multidimensional global work size, leave the global_work_size vector empty, \
30093  and set multi_dimensional to true. Setting the global work size based on the values inside \
30094  the global_work_size vector.");
30095  }
30096  if (temp_sz == 1){
30097  size[0] = params->global_work_size[0];
30098  work_dimension = 1;
30099  }
30100  else if (temp_sz == 2){
30101  size[0] = params->global_work_size[0];
30102  size[1] = params->global_work_size[1];
30103  work_dimension = 2;
30104  }
30105  else{
30106  size[0] = params->global_work_size[0];
30107  size[1] = params->global_work_size[1];
30108  size[2] = params->global_work_size[2];
30109  if (temp_sz > 3){
30110  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30111  }
30112  }
30113  }
30114 
30115  cl_event gpuExec;
30116 
30117  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30118 
30119  clWaitForEvents(1, &gpuExec);
30120 
30121  double *result = (double *) malloc(typesz);
30122  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30123 
30124  v->assign(result, result+sz);
30125 
30126  if (typesz2 != typesz or sz != sz2){
30127  double *result2;
30128  result2 = (double *) malloc(typesz2);
30129  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30130 
30131  v2->assign(result2, result2+sz2);
30132  free(result2);
30133  }
30134  else{
30135  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30136 
30137  v2->assign(result, result+sz2);
30138  }
30139 
30140  clReleaseCommandQueue (queue);
30141  clReleaseMemObject(buffer);
30142  clReleaseMemObject(buffer2);
30143  clReleaseMemObject(buffer3);
30144  clReleaseEvent(gpuExec);
30145  free(result);
30146  }
30147 
30148  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, std::vector<char>* v3, const ROS_OpenCL_Params* params){
30149  size_t sz = v->size();
30150  size_t sz2 = v2->size();
30151  size_t sz3 = v3->size();
30152  size_t typesz = sizeof(double) * sz;
30153  size_t typesz2 = sizeof(double) * sz2;
30154  size_t typesz3 = sizeof(char) * sz3;
30155  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30156  if (temp_sz > 0){
30157  if (temp_sz > 2){
30158  if (temp_sz > 3){
30159  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30160  }
30161  typesz = sizeof(double) * params->buffers_size[0];
30162  typesz2 = sizeof(double) * params->buffers_size[1];
30163  typesz3 = sizeof(char) * params->buffers_size[2];
30164  }
30165  else{
30166  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30167  }
30168  }
30169  cl_int error = 0;
30170  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30171  checkError(error);
30172  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30173  checkError(error);
30174  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
30175  checkError(error);
30176  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30177  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30178  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30179  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30180 
30181  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30182  checkError (error);
30183  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30184  checkError (error);
30185  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
30186  checkError (error);
30187 
30188  size_t size[3] = {sz, sz2, sz3};
30189  size_t work_dimension = 3;
30190 
30191  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30192  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30193  work_dimension = 1;
30194  }
30195  else if(temp_sz > 0){
30196  if (params->multi_dimensional){
30197  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30198  For default multidimensional global work size, leave the global_work_size vector empty, \
30199  and set multi_dimensional to true. Setting the global work size based on the values inside \
30200  the global_work_size vector.");
30201  }
30202  if (temp_sz == 1){
30203  size[0] = params->global_work_size[0];
30204  work_dimension = 1;
30205  }
30206  else if (temp_sz == 2){
30207  size[0] = params->global_work_size[0];
30208  size[1] = params->global_work_size[1];
30209  work_dimension = 2;
30210  }
30211  else{
30212  size[0] = params->global_work_size[0];
30213  size[1] = params->global_work_size[1];
30214  size[2] = params->global_work_size[2];
30215  if (temp_sz > 3){
30216  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30217  }
30218  }
30219  }
30220 
30221  cl_event gpuExec;
30222 
30223  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30224 
30225  clWaitForEvents(1, &gpuExec);
30226 
30227  double *result = (double *) malloc(typesz);
30228  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30229 
30230  v->assign(result, result+sz);
30231 
30232  if (typesz2 != typesz or sz != sz2){
30233  double *result2;
30234  result2 = (double *) malloc(typesz2);
30235  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30236 
30237  v2->assign(result2, result2+sz2);
30238  free(result2);
30239  }
30240  else{
30241  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30242 
30243  v2->assign(result, result+sz2);
30244  }
30245 
30246  if (typesz3 != typesz or sz != sz3){
30247  char *result3;
30248  result3 = (char *) malloc(typesz3);
30249  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
30250 
30251  v3->assign(result3, result3+sz3);
30252  free(result3);
30253  }
30254  else{
30255  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30256 
30257  v3->assign(result, result+sz3);
30258  }
30259 
30260  clReleaseCommandQueue (queue);
30261  clReleaseMemObject(buffer);
30262  clReleaseMemObject(buffer2);
30263  clReleaseMemObject(buffer3);
30264  clReleaseEvent(gpuExec);
30265  free(result);
30266  }
30267 
30268 
30269  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
30270  size_t sz = v.size();
30271  size_t sz2 = v2.size();
30272  size_t sz3 = v3.size();
30273  size_t typesz = sizeof(double) * sz;
30274  size_t typesz2 = sizeof(double) * sz2;
30275  size_t typesz3 = sizeof(int) * sz3;
30276  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30277  if (temp_sz > 0){
30278  if (temp_sz > 2){
30279  if (temp_sz > 3){
30280  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30281  }
30282  typesz = sizeof(double) * params->buffers_size[0];
30283  typesz2 = sizeof(double) * params->buffers_size[1];
30284  typesz3 = sizeof(int) * params->buffers_size[2];
30285  }
30286  else{
30287  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30288  }
30289  }
30290  cl_int error = 0;
30291  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30292  checkError(error);
30293  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30294  checkError(error);
30295  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30296  checkError(error);
30297  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30298  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30299  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30300  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30301 
30302  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
30303  checkError (error);
30304  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30305  checkError (error);
30306  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30307  checkError (error);
30308 
30309  size_t size[3] = {sz, sz2, sz3};
30310  size_t work_dimension = 3;
30311 
30312  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30313  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30314  work_dimension = 1;
30315  }
30316  else if(temp_sz > 0){
30317  if (params->multi_dimensional){
30318  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30319  For default multidimensional global work size, leave the global_work_size vector empty, \
30320  and set multi_dimensional to true. Setting the global work size based on the values inside \
30321  the global_work_size vector.");
30322  }
30323  if (temp_sz == 1){
30324  size[0] = params->global_work_size[0];
30325  work_dimension = 1;
30326  }
30327  else if (temp_sz == 2){
30328  size[0] = params->global_work_size[0];
30329  size[1] = params->global_work_size[1];
30330  work_dimension = 2;
30331  }
30332  else{
30333  size[0] = params->global_work_size[0];
30334  size[1] = params->global_work_size[1];
30335  size[2] = params->global_work_size[2];
30336  if (temp_sz > 3){
30337  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30338  }
30339  }
30340  }
30341 
30342  cl_event gpuExec;
30343 
30344  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30345 
30346  clWaitForEvents(1, &gpuExec);
30347 
30348  double *result = (double *) malloc(typesz);
30349  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30350 
30351  std::vector<double> res = std::vector<double>();
30352  res.assign(result, result+sz);
30353 
30354  clReleaseCommandQueue (queue);
30355  clReleaseMemObject(buffer);
30356  clReleaseMemObject(buffer2);
30357  clReleaseMemObject(buffer3);
30358  clReleaseEvent(gpuExec);
30359  free(result);
30360 
30361  return res;
30362  }
30363 
30364  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<double> v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
30365  size_t sz = v->size();
30366  size_t sz2 = v2.size();
30367  size_t sz3 = v3.size();
30368  size_t typesz = sizeof(double) * sz;
30369  size_t typesz2 = sizeof(double) * sz2;
30370  size_t typesz3 = sizeof(int) * sz3;
30371  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30372  if (temp_sz > 0){
30373  if (temp_sz > 2){
30374  if (temp_sz > 3){
30375  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30376  }
30377  typesz = sizeof(double) * params->buffers_size[0];
30378  typesz2 = sizeof(double) * params->buffers_size[1];
30379  typesz3 = sizeof(int) * params->buffers_size[2];
30380  }
30381  else{
30382  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30383  }
30384  }
30385  cl_int error = 0;
30386  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30387  checkError(error);
30388  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30389  checkError(error);
30390  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30391  checkError(error);
30392  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30393  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30394  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30395  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30396 
30397  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30398  checkError (error);
30399  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30400  checkError (error);
30401  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30402  checkError (error);
30403 
30404  size_t size[3] = {sz, sz2, sz3};
30405  size_t work_dimension = 3;
30406 
30407  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30408  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30409  work_dimension = 1;
30410  }
30411  else if(temp_sz > 0){
30412  if (params->multi_dimensional){
30413  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30414  For default multidimensional global work size, leave the global_work_size vector empty, \
30415  and set multi_dimensional to true. Setting the global work size based on the values inside \
30416  the global_work_size vector.");
30417  }
30418  if (temp_sz == 1){
30419  size[0] = params->global_work_size[0];
30420  work_dimension = 1;
30421  }
30422  else if (temp_sz == 2){
30423  size[0] = params->global_work_size[0];
30424  size[1] = params->global_work_size[1];
30425  work_dimension = 2;
30426  }
30427  else{
30428  size[0] = params->global_work_size[0];
30429  size[1] = params->global_work_size[1];
30430  size[2] = params->global_work_size[2];
30431  if (temp_sz > 3){
30432  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30433  }
30434  }
30435  }
30436 
30437  cl_event gpuExec;
30438 
30439  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30440 
30441  clWaitForEvents(1, &gpuExec);
30442 
30443  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30444 
30445  clWaitForEvents(1, &gpuExec);
30446 
30447  double *result = (double *) malloc(typesz);
30448  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30449 
30450  v->assign(result, result+sz);
30451 
30452  clReleaseCommandQueue (queue);
30453  clReleaseMemObject(buffer);
30454  clReleaseMemObject(buffer2);
30455  clReleaseMemObject(buffer3);
30456  clReleaseEvent(gpuExec);
30457  free(result);
30458  }
30459 
30460  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, const std::vector<int> v3, const ROS_OpenCL_Params* params){
30461  size_t sz = v->size();
30462  size_t sz2 = v2->size();
30463  size_t sz3 = v3.size();
30464  size_t typesz = sizeof(double) * sz;
30465  size_t typesz2 = sizeof(double) * sz2;
30466  size_t typesz3 = sizeof(int) * sz3;
30467  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30468  if (temp_sz > 0){
30469  if (temp_sz > 2){
30470  if (temp_sz > 3){
30471  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30472  }
30473  typesz = sizeof(double) * params->buffers_size[0];
30474  typesz2 = sizeof(double) * params->buffers_size[1];
30475  typesz3 = sizeof(int) * params->buffers_size[2];
30476  }
30477  else{
30478  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30479  }
30480  }
30481  cl_int error = 0;
30482  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30483  checkError(error);
30484  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30485  checkError(error);
30486  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30487  checkError(error);
30488  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30489  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30490  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30491  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30492 
30493  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30494  checkError (error);
30495  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30496  checkError (error);
30497  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30498  checkError (error);
30499 
30500  size_t size[3] = {sz, sz2, sz3};
30501  size_t work_dimension = 3;
30502 
30503  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30504  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30505  work_dimension = 1;
30506  }
30507  else if(temp_sz > 0){
30508  if (params->multi_dimensional){
30509  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30510  For default multidimensional global work size, leave the global_work_size vector empty, \
30511  and set multi_dimensional to true. Setting the global work size based on the values inside \
30512  the global_work_size vector.");
30513  }
30514  if (temp_sz == 1){
30515  size[0] = params->global_work_size[0];
30516  work_dimension = 1;
30517  }
30518  else if (temp_sz == 2){
30519  size[0] = params->global_work_size[0];
30520  size[1] = params->global_work_size[1];
30521  work_dimension = 2;
30522  }
30523  else{
30524  size[0] = params->global_work_size[0];
30525  size[1] = params->global_work_size[1];
30526  size[2] = params->global_work_size[2];
30527  if (temp_sz > 3){
30528  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30529  }
30530  }
30531  }
30532 
30533  cl_event gpuExec;
30534 
30535  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30536 
30537  clWaitForEvents(1, &gpuExec);
30538 
30539  double *result = (double *) malloc(typesz);
30540  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30541 
30542  v->assign(result, result+sz);
30543 
30544  if (typesz2 != typesz or sz != sz2){
30545  double *result2;
30546  result2 = (double *) malloc(typesz2);
30547  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30548 
30549  v2->assign(result2, result2+sz2);
30550  free(result2);
30551  }
30552  else{
30553  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30554 
30555  v2->assign(result, result+sz2);
30556  }
30557 
30558  clReleaseCommandQueue (queue);
30559  clReleaseMemObject(buffer);
30560  clReleaseMemObject(buffer2);
30561  clReleaseMemObject(buffer3);
30562  clReleaseEvent(gpuExec);
30563  free(result);
30564  }
30565 
30566  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, std::vector<int>* v3, const ROS_OpenCL_Params* params){
30567  size_t sz = v->size();
30568  size_t sz2 = v2->size();
30569  size_t sz3 = v3->size();
30570  size_t typesz = sizeof(double) * sz;
30571  size_t typesz2 = sizeof(double) * sz2;
30572  size_t typesz3 = sizeof(int) * sz3;
30573  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30574  if (temp_sz > 0){
30575  if (temp_sz > 2){
30576  if (temp_sz > 3){
30577  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30578  }
30579  typesz = sizeof(double) * params->buffers_size[0];
30580  typesz2 = sizeof(double) * params->buffers_size[1];
30581  typesz3 = sizeof(int) * params->buffers_size[2];
30582  }
30583  else{
30584  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30585  }
30586  }
30587  cl_int error = 0;
30588  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30589  checkError(error);
30590  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30591  checkError(error);
30592  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
30593  checkError(error);
30594  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30595  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30596  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30597  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30598 
30599  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30600  checkError (error);
30601  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30602  checkError (error);
30603  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
30604  checkError (error);
30605 
30606  size_t size[3] = {sz, sz2, sz3};
30607  size_t work_dimension = 3;
30608 
30609  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30610  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30611  work_dimension = 1;
30612  }
30613  else if(temp_sz > 0){
30614  if (params->multi_dimensional){
30615  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30616  For default multidimensional global work size, leave the global_work_size vector empty, \
30617  and set multi_dimensional to true. Setting the global work size based on the values inside \
30618  the global_work_size vector.");
30619  }
30620  if (temp_sz == 1){
30621  size[0] = params->global_work_size[0];
30622  work_dimension = 1;
30623  }
30624  else if (temp_sz == 2){
30625  size[0] = params->global_work_size[0];
30626  size[1] = params->global_work_size[1];
30627  work_dimension = 2;
30628  }
30629  else{
30630  size[0] = params->global_work_size[0];
30631  size[1] = params->global_work_size[1];
30632  size[2] = params->global_work_size[2];
30633  if (temp_sz > 3){
30634  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30635  }
30636  }
30637  }
30638 
30639  cl_event gpuExec;
30640 
30641  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30642 
30643  clWaitForEvents(1, &gpuExec);
30644 
30645  double *result = (double *) malloc(typesz);
30646  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30647 
30648  v->assign(result, result+sz);
30649 
30650  if (typesz2 != typesz or sz != sz2){
30651  double *result2;
30652  result2 = (double *) malloc(typesz2);
30653  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30654 
30655  v2->assign(result2, result2+sz2);
30656  free(result2);
30657  }
30658  else{
30659  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30660 
30661  v2->assign(result, result+sz2);
30662  }
30663 
30664  if (typesz3 != typesz or sz != sz3){
30665  int *result3;
30666  result3 = (int *) malloc(typesz3);
30667  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
30668 
30669  v3->assign(result3, result3+sz3);
30670  free(result3);
30671  }
30672  else{
30673  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30674 
30675  v3->assign(result, result+sz3);
30676  }
30677 
30678  clReleaseCommandQueue (queue);
30679  clReleaseMemObject(buffer);
30680  clReleaseMemObject(buffer2);
30681  clReleaseMemObject(buffer3);
30682  clReleaseEvent(gpuExec);
30683  free(result);
30684  }
30685 
30686 
30687  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
30688  size_t sz = v.size();
30689  size_t sz2 = v2.size();
30690  size_t sz3 = v3.size();
30691  size_t typesz = sizeof(double) * sz;
30692  size_t typesz2 = sizeof(double) * sz2;
30693  size_t typesz3 = sizeof(float) * sz3;
30694  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30695  if (temp_sz > 0){
30696  if (temp_sz > 2){
30697  if (temp_sz > 3){
30698  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30699  }
30700  typesz = sizeof(double) * params->buffers_size[0];
30701  typesz2 = sizeof(double) * params->buffers_size[1];
30702  typesz3 = sizeof(float) * params->buffers_size[2];
30703  }
30704  else{
30705  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30706  }
30707  }
30708  cl_int error = 0;
30709  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30710  checkError(error);
30711  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30712  checkError(error);
30713  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30714  checkError(error);
30715  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30716  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30717  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30718  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30719 
30720  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
30721  checkError (error);
30722  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30723  checkError (error);
30724  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30725  checkError (error);
30726 
30727  size_t size[3] = {sz, sz2, sz3};
30728  size_t work_dimension = 3;
30729 
30730  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30731  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30732  work_dimension = 1;
30733  }
30734  else if(temp_sz > 0){
30735  if (params->multi_dimensional){
30736  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30737  For default multidimensional global work size, leave the global_work_size vector empty, \
30738  and set multi_dimensional to true. Setting the global work size based on the values inside \
30739  the global_work_size vector.");
30740  }
30741  if (temp_sz == 1){
30742  size[0] = params->global_work_size[0];
30743  work_dimension = 1;
30744  }
30745  else if (temp_sz == 2){
30746  size[0] = params->global_work_size[0];
30747  size[1] = params->global_work_size[1];
30748  work_dimension = 2;
30749  }
30750  else{
30751  size[0] = params->global_work_size[0];
30752  size[1] = params->global_work_size[1];
30753  size[2] = params->global_work_size[2];
30754  if (temp_sz > 3){
30755  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30756  }
30757  }
30758  }
30759 
30760  cl_event gpuExec;
30761 
30762  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30763 
30764  clWaitForEvents(1, &gpuExec);
30765 
30766  double *result = (double *) malloc(typesz);
30767  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30768 
30769  std::vector<double> res = std::vector<double>();
30770  res.assign(result, result+sz);
30771 
30772  clReleaseCommandQueue (queue);
30773  clReleaseMemObject(buffer);
30774  clReleaseMemObject(buffer2);
30775  clReleaseMemObject(buffer3);
30776  clReleaseEvent(gpuExec);
30777  free(result);
30778 
30779  return res;
30780  }
30781 
30782  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<double> v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
30783  size_t sz = v->size();
30784  size_t sz2 = v2.size();
30785  size_t sz3 = v3.size();
30786  size_t typesz = sizeof(double) * sz;
30787  size_t typesz2 = sizeof(double) * sz2;
30788  size_t typesz3 = sizeof(float) * sz3;
30789  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30790  if (temp_sz > 0){
30791  if (temp_sz > 2){
30792  if (temp_sz > 3){
30793  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30794  }
30795  typesz = sizeof(double) * params->buffers_size[0];
30796  typesz2 = sizeof(double) * params->buffers_size[1];
30797  typesz3 = sizeof(float) * params->buffers_size[2];
30798  }
30799  else{
30800  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30801  }
30802  }
30803  cl_int error = 0;
30804  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30805  checkError(error);
30806  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30807  checkError(error);
30808  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30809  checkError(error);
30810  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30811  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30812  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30813  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30814 
30815  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30816  checkError (error);
30817  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30818  checkError (error);
30819  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30820  checkError (error);
30821 
30822  size_t size[3] = {sz, sz2, sz3};
30823  size_t work_dimension = 3;
30824 
30825  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30826  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30827  work_dimension = 1;
30828  }
30829  else if(temp_sz > 0){
30830  if (params->multi_dimensional){
30831  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30832  For default multidimensional global work size, leave the global_work_size vector empty, \
30833  and set multi_dimensional to true. Setting the global work size based on the values inside \
30834  the global_work_size vector.");
30835  }
30836  if (temp_sz == 1){
30837  size[0] = params->global_work_size[0];
30838  work_dimension = 1;
30839  }
30840  else if (temp_sz == 2){
30841  size[0] = params->global_work_size[0];
30842  size[1] = params->global_work_size[1];
30843  work_dimension = 2;
30844  }
30845  else{
30846  size[0] = params->global_work_size[0];
30847  size[1] = params->global_work_size[1];
30848  size[2] = params->global_work_size[2];
30849  if (temp_sz > 3){
30850  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30851  }
30852  }
30853  }
30854 
30855  cl_event gpuExec;
30856 
30857  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30858 
30859  clWaitForEvents(1, &gpuExec);
30860 
30861  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30862 
30863  clWaitForEvents(1, &gpuExec);
30864 
30865  double *result = (double *) malloc(typesz);
30866  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30867 
30868  v->assign(result, result+sz);
30869 
30870  clReleaseCommandQueue (queue);
30871  clReleaseMemObject(buffer);
30872  clReleaseMemObject(buffer2);
30873  clReleaseMemObject(buffer3);
30874  clReleaseEvent(gpuExec);
30875  free(result);
30876  }
30877 
30878  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, const std::vector<float> v3, const ROS_OpenCL_Params* params){
30879  size_t sz = v->size();
30880  size_t sz2 = v2->size();
30881  size_t sz3 = v3.size();
30882  size_t typesz = sizeof(double) * sz;
30883  size_t typesz2 = sizeof(double) * sz2;
30884  size_t typesz3 = sizeof(float) * sz3;
30885  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30886  if (temp_sz > 0){
30887  if (temp_sz > 2){
30888  if (temp_sz > 3){
30889  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30890  }
30891  typesz = sizeof(double) * params->buffers_size[0];
30892  typesz2 = sizeof(double) * params->buffers_size[1];
30893  typesz3 = sizeof(float) * params->buffers_size[2];
30894  }
30895  else{
30896  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30897  }
30898  }
30899  cl_int error = 0;
30900  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30901  checkError(error);
30902  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30903  checkError(error);
30904  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30905  checkError(error);
30906  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
30907  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
30908  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
30909  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
30910 
30911  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30912  checkError (error);
30913  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30914  checkError (error);
30915  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30916  checkError (error);
30917 
30918  size_t size[3] = {sz, sz2, sz3};
30919  size_t work_dimension = 3;
30920 
30921  temp_sz = params != NULL ? params->global_work_size.size() : 0;
30922  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
30923  work_dimension = 1;
30924  }
30925  else if(temp_sz > 0){
30926  if (params->multi_dimensional){
30927  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
30928  For default multidimensional global work size, leave the global_work_size vector empty, \
30929  and set multi_dimensional to true. Setting the global work size based on the values inside \
30930  the global_work_size vector.");
30931  }
30932  if (temp_sz == 1){
30933  size[0] = params->global_work_size[0];
30934  work_dimension = 1;
30935  }
30936  else if (temp_sz == 2){
30937  size[0] = params->global_work_size[0];
30938  size[1] = params->global_work_size[1];
30939  work_dimension = 2;
30940  }
30941  else{
30942  size[0] = params->global_work_size[0];
30943  size[1] = params->global_work_size[1];
30944  size[2] = params->global_work_size[2];
30945  if (temp_sz > 3){
30946  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30947  }
30948  }
30949  }
30950 
30951  cl_event gpuExec;
30952 
30953  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30954 
30955  clWaitForEvents(1, &gpuExec);
30956 
30957  double *result = (double *) malloc(typesz);
30958  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30959 
30960  v->assign(result, result+sz);
30961 
30962  if (typesz2 != typesz or sz != sz2){
30963  double *result2;
30964  result2 = (double *) malloc(typesz2);
30965  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30966 
30967  v2->assign(result2, result2+sz2);
30968  free(result2);
30969  }
30970  else{
30971  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30972 
30973  v2->assign(result, result+sz2);
30974  }
30975 
30976  clReleaseCommandQueue (queue);
30977  clReleaseMemObject(buffer);
30978  clReleaseMemObject(buffer2);
30979  clReleaseMemObject(buffer3);
30980  clReleaseEvent(gpuExec);
30981  free(result);
30982  }
30983 
30984  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, std::vector<float>* v3, const ROS_OpenCL_Params* params){
30985  size_t sz = v->size();
30986  size_t sz2 = v2->size();
30987  size_t sz3 = v3->size();
30988  size_t typesz = sizeof(double) * sz;
30989  size_t typesz2 = sizeof(double) * sz2;
30990  size_t typesz3 = sizeof(float) * sz3;
30991  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
30992  if (temp_sz > 0){
30993  if (temp_sz > 2){
30994  if (temp_sz > 3){
30995  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30996  }
30997  typesz = sizeof(double) * params->buffers_size[0];
30998  typesz2 = sizeof(double) * params->buffers_size[1];
30999  typesz3 = sizeof(float) * params->buffers_size[2];
31000  }
31001  else{
31002  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31003  }
31004  }
31005  cl_int error = 0;
31006  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31007  checkError(error);
31008  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31009  checkError(error);
31010  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
31011  checkError(error);
31012  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
31013  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
31014  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
31015  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
31016 
31017  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31018  checkError (error);
31019  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31020  checkError (error);
31021  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
31022  checkError (error);
31023 
31024  size_t size[3] = {sz, sz2, sz3};
31025  size_t work_dimension = 3;
31026 
31027  temp_sz = params != NULL ? params->global_work_size.size() : 0;
31028  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
31029  work_dimension = 1;
31030  }
31031  else if(temp_sz > 0){
31032  if (params->multi_dimensional){
31033  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
31034  For default multidimensional global work size, leave the global_work_size vector empty, \
31035  and set multi_dimensional to true. Setting the global work size based on the values inside \
31036  the global_work_size vector.");
31037  }
31038  if (temp_sz == 1){
31039  size[0] = params->global_work_size[0];
31040  work_dimension = 1;
31041  }
31042  else if (temp_sz == 2){
31043  size[0] = params->global_work_size[0];
31044  size[1] = params->global_work_size[1];
31045  work_dimension = 2;
31046  }
31047  else{
31048  size[0] = params->global_work_size[0];
31049  size[1] = params->global_work_size[1];
31050  size[2] = params->global_work_size[2];
31051  if (temp_sz > 3){
31052  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31053  }
31054  }
31055  }
31056 
31057  cl_event gpuExec;
31058 
31059  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31060 
31061  clWaitForEvents(1, &gpuExec);
31062 
31063  double *result = (double *) malloc(typesz);
31064  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31065 
31066  v->assign(result, result+sz);
31067 
31068  if (typesz2 != typesz or sz != sz2){
31069  double *result2;
31070  result2 = (double *) malloc(typesz2);
31071  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31072 
31073  v2->assign(result2, result2+sz2);
31074  free(result2);
31075  }
31076  else{
31077  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31078 
31079  v2->assign(result, result+sz2);
31080  }
31081 
31082  if (typesz3 != typesz or sz != sz3){
31083  float *result3;
31084  result3 = (float *) malloc(typesz3);
31085  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
31086 
31087  v3->assign(result3, result3+sz3);
31088  free(result3);
31089  }
31090  else{
31091  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31092 
31093  v3->assign(result, result+sz3);
31094  }
31095 
31096  clReleaseCommandQueue (queue);
31097  clReleaseMemObject(buffer);
31098  clReleaseMemObject(buffer2);
31099  clReleaseMemObject(buffer3);
31100  clReleaseEvent(gpuExec);
31101  free(result);
31102  }
31103 
31104 
31105  std::vector<double> ROS_OpenCL::process(const std::vector<double> v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
31106  size_t sz = v.size();
31107  size_t sz2 = v2.size();
31108  size_t sz3 = v3.size();
31109  size_t typesz = sizeof(double) * sz;
31110  size_t typesz2 = sizeof(double) * sz2;
31111  size_t typesz3 = sizeof(double) * sz3;
31112  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
31113  if (temp_sz > 0){
31114  if (temp_sz > 2){
31115  if (temp_sz > 3){
31116  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31117  }
31118  typesz = sizeof(double) * params->buffers_size[0];
31119  typesz2 = sizeof(double) * params->buffers_size[1];
31120  typesz3 = sizeof(double) * params->buffers_size[2];
31121  }
31122  else{
31123  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31124  }
31125  }
31126  cl_int error = 0;
31127  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31128  checkError(error);
31129  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
31130  checkError(error);
31131  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31132  checkError(error);
31133  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
31134  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
31135  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
31136  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
31137 
31138  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
31139  checkError (error);
31140  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
31141  checkError (error);
31142  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31143  checkError (error);
31144 
31145  size_t size[3] = {sz, sz2, sz3};
31146  size_t work_dimension = 3;
31147 
31148  temp_sz = params != NULL ? params->global_work_size.size() : 0;
31149  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
31150  work_dimension = 1;
31151  }
31152  else if(temp_sz > 0){
31153  if (params->multi_dimensional){
31154  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
31155  For default multidimensional global work size, leave the global_work_size vector empty, \
31156  and set multi_dimensional to true. Setting the global work size based on the values inside \
31157  the global_work_size vector.");
31158  }
31159  if (temp_sz == 1){
31160  size[0] = params->global_work_size[0];
31161  work_dimension = 1;
31162  }
31163  else if (temp_sz == 2){
31164  size[0] = params->global_work_size[0];
31165  size[1] = params->global_work_size[1];
31166  work_dimension = 2;
31167  }
31168  else{
31169  size[0] = params->global_work_size[0];
31170  size[1] = params->global_work_size[1];
31171  size[2] = params->global_work_size[2];
31172  if (temp_sz > 3){
31173  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31174  }
31175  }
31176  }
31177 
31178  cl_event gpuExec;
31179 
31180  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31181 
31182  clWaitForEvents(1, &gpuExec);
31183 
31184  double *result = (double *) malloc(typesz);
31185  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31186 
31187  std::vector<double> res = std::vector<double>();
31188  res.assign(result, result+sz);
31189 
31190  clReleaseCommandQueue (queue);
31191  clReleaseMemObject(buffer);
31192  clReleaseMemObject(buffer2);
31193  clReleaseMemObject(buffer3);
31194  clReleaseEvent(gpuExec);
31195  free(result);
31196 
31197  return res;
31198  }
31199 
31200  void ROS_OpenCL::process(std::vector<double>* v, const std::vector<double> v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
31201  size_t sz = v->size();
31202  size_t sz2 = v2.size();
31203  size_t sz3 = v3.size();
31204  size_t typesz = sizeof(double) * sz;
31205  size_t typesz2 = sizeof(double) * sz2;
31206  size_t typesz3 = sizeof(double) * sz3;
31207  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
31208  if (temp_sz > 0){
31209  if (temp_sz > 2){
31210  if (temp_sz > 3){
31211  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31212  }
31213  typesz = sizeof(double) * params->buffers_size[0];
31214  typesz2 = sizeof(double) * params->buffers_size[1];
31215  typesz3 = sizeof(double) * params->buffers_size[2];
31216  }
31217  else{
31218  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31219  }
31220  }
31221  cl_int error = 0;
31222  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31223  checkError(error);
31224  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
31225  checkError(error);
31226  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31227  checkError(error);
31228  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
31229  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
31230  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
31231  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
31232 
31233  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31234  checkError (error);
31235  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
31236  checkError (error);
31237  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31238  checkError (error);
31239 
31240  size_t size[3] = {sz, sz2, sz3};
31241  size_t work_dimension = 3;
31242 
31243  temp_sz = params != NULL ? params->global_work_size.size() : 0;
31244  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
31245  work_dimension = 1;
31246  }
31247  else if(temp_sz > 0){
31248  if (params->multi_dimensional){
31249  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
31250  For default multidimensional global work size, leave the global_work_size vector empty, \
31251  and set multi_dimensional to true. Setting the global work size based on the values inside \
31252  the global_work_size vector.");
31253  }
31254  if (temp_sz == 1){
31255  size[0] = params->global_work_size[0];
31256  work_dimension = 1;
31257  }
31258  else if (temp_sz == 2){
31259  size[0] = params->global_work_size[0];
31260  size[1] = params->global_work_size[1];
31261  work_dimension = 2;
31262  }
31263  else{
31264  size[0] = params->global_work_size[0];
31265  size[1] = params->global_work_size[1];
31266  size[2] = params->global_work_size[2];
31267  if (temp_sz > 3){
31268  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31269  }
31270  }
31271  }
31272 
31273  cl_event gpuExec;
31274 
31275  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31276 
31277  clWaitForEvents(1, &gpuExec);
31278 
31279  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31280 
31281  clWaitForEvents(1, &gpuExec);
31282 
31283  double *result = (double *) malloc(typesz);
31284  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31285 
31286  v->assign(result, result+sz);
31287 
31288  clReleaseCommandQueue (queue);
31289  clReleaseMemObject(buffer);
31290  clReleaseMemObject(buffer2);
31291  clReleaseMemObject(buffer3);
31292  clReleaseEvent(gpuExec);
31293  free(result);
31294  }
31295 
31296  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, const std::vector<double> v3, const ROS_OpenCL_Params* params){
31297  size_t sz = v->size();
31298  size_t sz2 = v2->size();
31299  size_t sz3 = v3.size();
31300  size_t typesz = sizeof(double) * sz;
31301  size_t typesz2 = sizeof(double) * sz2;
31302  size_t typesz3 = sizeof(double) * sz3;
31303  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
31304  if (temp_sz > 0){
31305  if (temp_sz > 2){
31306  if (temp_sz > 3){
31307  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31308  }
31309  typesz = sizeof(double) * params->buffers_size[0];
31310  typesz2 = sizeof(double) * params->buffers_size[1];
31311  typesz3 = sizeof(double) * params->buffers_size[2];
31312  }
31313  else{
31314  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31315  }
31316  }
31317  cl_int error = 0;
31318  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31319  checkError(error);
31320  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31321  checkError(error);
31322  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31323  checkError(error);
31324  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
31325  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
31326  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
31327  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
31328 
31329  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31330  checkError (error);
31331  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31332  checkError (error);
31333  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31334  checkError (error);
31335 
31336  size_t size[3] = {sz, sz2, sz3};
31337  size_t work_dimension = 3;
31338 
31339  temp_sz = params != NULL ? params->global_work_size.size() : 0;
31340  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
31341  work_dimension = 1;
31342  }
31343  else if(temp_sz > 0){
31344  if (params->multi_dimensional){
31345  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
31346  For default multidimensional global work size, leave the global_work_size vector empty, \
31347  and set multi_dimensional to true. Setting the global work size based on the values inside \
31348  the global_work_size vector.");
31349  }
31350  if (temp_sz == 1){
31351  size[0] = params->global_work_size[0];
31352  work_dimension = 1;
31353  }
31354  else if (temp_sz == 2){
31355  size[0] = params->global_work_size[0];
31356  size[1] = params->global_work_size[1];
31357  work_dimension = 2;
31358  }
31359  else{
31360  size[0] = params->global_work_size[0];
31361  size[1] = params->global_work_size[1];
31362  size[2] = params->global_work_size[2];
31363  if (temp_sz > 3){
31364  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31365  }
31366  }
31367  }
31368 
31369  cl_event gpuExec;
31370 
31371  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31372 
31373  clWaitForEvents(1, &gpuExec);
31374 
31375  double *result = (double *) malloc(typesz);
31376  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31377 
31378  v->assign(result, result+sz);
31379 
31380  if (typesz2 != typesz or sz != sz2){
31381  double *result2;
31382  result2 = (double *) malloc(typesz2);
31383  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31384 
31385  v2->assign(result2, result2+sz2);
31386  free(result2);
31387  }
31388  else{
31389  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31390 
31391  v2->assign(result, result+sz2);
31392  }
31393 
31394  clReleaseCommandQueue (queue);
31395  clReleaseMemObject(buffer);
31396  clReleaseMemObject(buffer2);
31397  clReleaseMemObject(buffer3);
31398  clReleaseEvent(gpuExec);
31399  free(result);
31400  }
31401 
31402  void ROS_OpenCL::process(std::vector<double>* v, std::vector<double>* v2, std::vector<double>* v3, const ROS_OpenCL_Params* params){
31403  size_t sz = v->size();
31404  size_t sz2 = v2->size();
31405  size_t sz3 = v3->size();
31406  size_t typesz = sizeof(double) * sz;
31407  size_t typesz2 = sizeof(double) * sz2;
31408  size_t typesz3 = sizeof(double) * sz3;
31409  size_t temp_sz = params != NULL ? params->buffers_size.size() : 0;
31410  if (temp_sz > 0){
31411  if (temp_sz > 2){
31412  if (temp_sz > 3){
31413  ROS_WARN("buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31414  }
31415  typesz = sizeof(double) * params->buffers_size[0];
31416  typesz2 = sizeof(double) * params->buffers_size[1];
31417  typesz3 = sizeof(double) * params->buffers_size[2];
31418  }
31419  else{
31420  ROS_WARN("buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31421  }
31422  }
31423  cl_int error = 0;
31424  cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31425  checkError(error);
31426  cl_mem buffer2 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31427  checkError(error);
31428  cl_mem buffer3 = clCreateBuffer(context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
31429  checkError(error);
31430  clSetKernelArg (kernel, 0, sizeof (cl_mem), &buffer);
31431  clSetKernelArg (kernel, 1, sizeof (cl_mem), &buffer2);
31432  clSetKernelArg (kernel, 2, sizeof (cl_mem), &buffer3);
31433  cl_command_queue queue = clCreateCommandQueueWithProperties (context, deviceIds [0], NULL, &error);
31434 
31435  clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31436  checkError (error);
31437  clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31438  checkError (error);
31439  clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
31440  checkError (error);
31441 
31442  size_t size[3] = {sz, sz2, sz3};
31443  size_t work_dimension = 3;
31444 
31445  temp_sz = params != NULL ? params->global_work_size.size() : 0;
31446  if (params == NULL or (params != NULL and not(params->multi_dimensional or temp_sz > 0))){
31447  work_dimension = 1;
31448  }
31449  else if(temp_sz > 0){
31450  if (params->multi_dimensional){
31451  ROS_WARN("multi_dimensional should be set to true without pushing to global_work_size. \
31452  For default multidimensional global work size, leave the global_work_size vector empty, \
31453  and set multi_dimensional to true. Setting the global work size based on the values inside \
31454  the global_work_size vector.");
31455  }
31456  if (temp_sz == 1){
31457  size[0] = params->global_work_size[0];
31458  work_dimension = 1;
31459  }
31460  else if (temp_sz == 2){
31461  size[0] = params->global_work_size[0];
31462  size[1] = params->global_work_size[1];
31463  work_dimension = 2;
31464  }
31465  else{
31466  size[0] = params->global_work_size[0];
31467  size[1] = params->global_work_size[1];
31468  size[2] = params->global_work_size[2];
31469  if (temp_sz > 3){
31470  ROS_WARN("global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31471  }
31472  }
31473  }
31474 
31475  cl_event gpuExec;
31476 
31477  checkError (clEnqueueNDRangeKernel (queue, kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31478 
31479  clWaitForEvents(1, &gpuExec);
31480 
31481  double *result = (double *) malloc(typesz);
31482  checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31483 
31484  v->assign(result, result+sz);
31485 
31486  if (typesz2 != typesz or sz != sz2){
31487  double *result2;
31488  result2 = (double *) malloc(typesz2);
31489  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31490 
31491  v2->assign(result2, result2+sz2);
31492  free(result2);
31493  }
31494  else{
31495  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31496 
31497  v2->assign(result, result+sz2);
31498  }
31499 
31500  if (typesz3 != typesz or sz != sz3){
31501  double *result3;
31502  result3 = (double *) malloc(typesz3);
31503  checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
31504 
31505  v3->assign(result3, result3+sz3);
31506  free(result3);
31507  }
31508  else{
31509  checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31510 
31511  v3->assign(result, result+sz3);
31512  }
31513 
31514  clReleaseCommandQueue (queue);
31515  clReleaseMemObject(buffer);
31516  clReleaseMemObject(buffer2);
31517  clReleaseMemObject(buffer3);
31518  clReleaseEvent(gpuExec);
31519  free(result);
31520  }
31521 }
void checkError(const cl_int error)
The function that generates ROS_WARN messages based on OpenCL error codes.
Definition: ros_opencl.cpp:35
std::string LoadKernel(const char *name)
The function that loads the OpenCL kernel.
Definition: ros_opencl.cpp:215
std::string getPlatformName(const cl_platform_id id)
Gets the platform name based on its ID.
Definition: ros_opencl.cpp:13
The ROS_OpenCL class.
Definition: ros_opencl.hpp:26
std::string getDeviceName(const cl_device_id id)
Gets the device name based on its ID.
Definition: ros_opencl.cpp:24
ROS_OpenCL()
Default (and empty) constructor.
Definition: ros_opencl.hpp:91
~ROS_OpenCL()
Destroys a ROS_OpenCL object.
Definition: ros_opencl.cpp:291
void clean()
The function that clears all allocated memory for OpenCL objects.
Definition: ros_opencl.cpp:7
The ROS_OpenCL_Params helper class.
sensor_msgs::PointCloud2 process(const sensor_msgs::PointCloud2 &msg)
The function that initiates kernel processing.
Definition: ros_opencl.cpp:305
cl_program createProgram(const std::string &source, const cl_context context)
Creates an Opencl program object.
Definition: ros_opencl.cpp:221
std::vector< cl_device_id > deviceIds
Definition: ros_opencl.hpp:32
ROS_OpenCL operator=(ROS_OpenCL *s)
Operator= overload.
Definition: ros_opencl.cpp:297